ripcols 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: bea749aa6951592e0f088129785105768125e22b1a3152aa7c3d6115f67248a8
4
+ data.tar.gz: 3d4770babf2b19aab4fdb1a32f0f7c4b44bc4ae9ebb87adc13412d1668940e27
5
+ SHA512:
6
+ metadata.gz: b42e2a8d38917700901906abdbd0e6e10e88d3e4db265c38644979dd02ebc022865aeccbce37f1bb2f4a436c3129db4091b4c16db175ef87ac955217256c256a
7
+ data.tar.gz: e656032c6cd11b6b61a4195e66efbc48788b1c515ffeea1d99e11741febbbd1a69c484f225c4aaa1a606ffd7c359b712d68e177d838643bf56cf66bfff11c5a3
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+
13
+ # bundler
14
+ vendor/bundle
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.6.3
7
+ before_install: gem install bundler -v 2.0.2
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in ripcols.gemspec
4
+ gemspec
5
+ gem 'irb', require: false
data/Gemfile.lock ADDED
@@ -0,0 +1,37 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ ripcols (0.1.4)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.3)
10
+ irb (1.0.0)
11
+ rake (10.5.0)
12
+ rspec (3.8.0)
13
+ rspec-core (~> 3.8.0)
14
+ rspec-expectations (~> 3.8.0)
15
+ rspec-mocks (~> 3.8.0)
16
+ rspec-core (3.8.2)
17
+ rspec-support (~> 3.8.0)
18
+ rspec-expectations (3.8.4)
19
+ diff-lcs (>= 1.2.0, < 2.0)
20
+ rspec-support (~> 3.8.0)
21
+ rspec-mocks (3.8.1)
22
+ diff-lcs (>= 1.2.0, < 2.0)
23
+ rspec-support (~> 3.8.0)
24
+ rspec-support (3.8.2)
25
+
26
+ PLATFORMS
27
+ ruby
28
+
29
+ DEPENDENCIES
30
+ bundler (~> 2.0)
31
+ irb
32
+ rake (~> 10.0)
33
+ ripcols!
34
+ rspec (~> 3.0)
35
+
36
+ BUNDLED WITH
37
+ 2.0.2
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 noor-rahim
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # Ripcols
2
+
3
+
4
+ ## Installation
5
+
6
+ Add this line to your application's Gemfile:
7
+
8
+ ```ruby
9
+ gem 'ripcols'
10
+ ```
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install ripcols
19
+
20
+ ## Usage
21
+
22
+ TODO: Write usage instructions here
23
+
24
+ ## Development
25
+
26
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
27
+
28
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
29
+
30
+ ## Contributing
31
+
32
+ Bug reports and pull requests are welcome on GitHub at https://github.com/noor-rahim/ripcols.
33
+
34
+ ## License
35
+
36
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "ripcols"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/lib/ripcols.rb ADDED
@@ -0,0 +1,67 @@
1
+ require "json"
2
+ require "ripcols/ripper"
3
+ require "ripcols/version"
4
+
5
+ module Ripcols
6
+ at_exit {
7
+
8
+ # todo: print helpful message, rather than exiting directly
9
+ req_file = self.caller_files.first
10
+ exit if req_file.nil?
11
+
12
+ patterns = Object.constants
13
+ .filter { |c| c.to_s.start_with?("HEADER_") || c.to_s.start_with?("LINE_") }
14
+ .map { |c| [c, Object.const_get(c)] }
15
+ .to_h
16
+
17
+ fbuf =
18
+ if ::ARGV.empty?
19
+ self.file_contents $stdin
20
+ else
21
+ open( ::ARGV.first ) { |f| self.file_contents(f) }
22
+ end
23
+
24
+ r = Ripper.new( patterns, fbuf )
25
+ puts JSON.dump( r.parse )
26
+
27
+ }
28
+
29
+ private
30
+
31
+ # here for future, in case we need to do some preprocessing on file
32
+ def self.file_contents f
33
+ # until f.eof? || ((ch = f.readchar) != "\n")
34
+ # end
35
+ # ch && f.ungetc(ch)
36
+ # f.pos = [f.pos.pred, 0].max
37
+ f.read
38
+ end
39
+
40
+
41
+ # taken from
42
+ # https://github.com/sinatra/sinatra/blob/eee711bce740d38a9a91aa6028688c9a6d74b23b/lib/sinatra/base.rb#L1505
43
+
44
+ # Like Kernel#caller but excluding certain magic entries and without
45
+ # line / method information; the resulting array contains filenames only.
46
+ def self.caller_files
47
+ cleaned_caller(1).flatten
48
+ end
49
+
50
+
51
+ CALLERS_TO_IGNORE = [ # :nodoc:
52
+ /^\(.*\)$/, # generated code
53
+ /rubygems\/(custom|core_ext\/kernel)_require\.rb$/, # rubygems require hacks
54
+ /bundler(\/(?:runtime|inline))?\.rb/, # bundler require hacks
55
+ /<internal:/, # internal in ruby >= 1.9.2
56
+ /src\/kernel\/bootstrap\/[A-Z]/, # maglev kernel files
57
+ /ripcols\/lib\/ripcols.rb/, # this file
58
+ ]
59
+
60
+ # Like Kernel#caller but excluding certain magic entries
61
+ def self.cleaned_caller(keep = 3)
62
+ Kernel.caller(1).
63
+ map! { |line| line.split(/:(?=\d|in )/, 3)[0,keep] }.
64
+ reject { |file, *_| CALLERS_TO_IGNORE.any? { |pattern| file =~ pattern } }
65
+ end
66
+
67
+ end
@@ -0,0 +1,199 @@
1
+ module Ripcols
2
+
3
+ class Ripper
4
+ @@REQUIRED_PATTERNS = %i(HEADER_BEGIN HEADER_END LINE_END)
5
+
6
+ def initialize(patterns, str, column_gap=3, out_f=$stdout, err_f=$stderr)
7
+ unless @@REQUIRED_PATTERNS.all? { |req_pattern| patterns.include? req_pattern }
8
+ raise ArgumentError, "all required keys not present.\n Required keys: #{@@REQUIRED_PATTERNS}"
9
+ end
10
+
11
+ @COL_GAP = column_gap
12
+
13
+ # @in_f = in_f
14
+ @fbuf = str
15
+ @out_f = out_f
16
+
17
+ col_del = /\s{#{@COL_GAP},}/
18
+ @patterns = patterns.dup
19
+ @patterns[:HEADER_COL_SEP] ||= col_del
20
+ @patterns[:LINE_COL_SEP] ||= col_del
21
+ @patterns[:LINE_SEP] ||= /\n/
22
+ # @patterns[:LINE_HEADER_SEP] ||= /\n+/
23
+
24
+ @hbuf = nil
25
+ @bbuf = nil
26
+
27
+ @line_column_begin = 0
28
+ end
29
+
30
+ def parse
31
+ headers = parse_head
32
+ lines = body_lines.split( @patterns[:LINE_SEP] )
33
+ # col_sep = @patterns[:LINE_COL_SEP]
34
+ lines.map { |line| columize_line(line, headers) }
35
+ end
36
+
37
+
38
+ def parse_head
39
+ hbuf = header_lines
40
+ k = hbuf.lines.reduce([]) do |grouping, l|
41
+ off = 0
42
+ l.strip
43
+ .split( @patterns[:'HEADER_COL_SEP'] )
44
+ .each do |w|
45
+ if w.empty?
46
+ next
47
+ end
48
+ bc = l.index(w, off)
49
+ off = ec = bc + w.length
50
+ insert_to( grouping , w, bc, ec )
51
+ end
52
+ grouping
53
+ end
54
+
55
+ k = k.sort { |(_, abc), (_, bbc)| abc <=> bbc }
56
+ .map { |(titles, bc, ec)| [titles.join(' '), bc, ec] }
57
+
58
+ if k.first
59
+ # todo: (possible BUG!)
60
+ # this code will break, when the initial columns dont begin from 0,
61
+ # its better to have some kind of hinting to know where the column
62
+ # begins.
63
+ #
64
+ # going with simplicity, beginning_column_position of 1st column be 0,
65
+ k.first[1] = @line_column_begin
66
+ end
67
+
68
+ k
69
+ end
70
+
71
+ def debug!
72
+ seperate_body_head
73
+ puts "HEADER\n______"
74
+ p header_lines
75
+ puts "BODY\n____"
76
+ p body_lines
77
+ end
78
+
79
+
80
+ private
81
+
82
+ # line : single line of string
83
+ # headers : [ (title, bc, ec) ...+ ]
84
+ #
85
+ # OUTPUT
86
+ # ======
87
+ # columized_line : Hash
88
+ # => {"col1": "matching stripped text", ...* }
89
+ #
90
+ # Note
91
+ # ====
92
+ # blank columns will not be part of the result.
93
+ #
94
+ def columize_line line, headers
95
+ return Hash[] if headers.empty?
96
+
97
+ ks = {}
98
+ idx = 0
99
+ delim = @patterns[:LINE_COL_SEP]
100
+ unresolved = nil
101
+
102
+ headers.each do |(title, bc, ec)|
103
+
104
+ if unresolved
105
+ if (unresolved[:text][:ec] + @COL_GAP) < bc
106
+ head = unresolved[:header]
107
+ ks[ head[:title] ] = unresolved[:text][:text]
108
+ idx = unresolved[:text][:ec]
109
+ unresolved = nil
110
+ end
111
+ end
112
+
113
+ break unless bc_idx = line.index( /\S/, idx )
114
+ ec_idx = line.index( delim, bc_idx ) || -1
115
+ val = line[ ec_idx == -1 ? (bc_idx .. ec_idx) : (bc_idx ... ec_idx) ]
116
+ if (bc_idx - @COL_GAP) <= ec
117
+ unresolved = nil
118
+ idx = ec_idx
119
+ ks[title] = val # line[bc_idx ... ec_idx]
120
+
121
+ break if ec_idx == -1
122
+ else
123
+ unresolved = {
124
+ "text": Hash[:text, val, :bc, bc_idx, :ec, ec_idx],
125
+ "header": Hash[:title, title, :bc, bc, :ec, ec],
126
+ }
127
+ end
128
+
129
+ end
130
+
131
+ ks
132
+ end
133
+
134
+
135
+ def seperate_body_head
136
+ fbuf = @fbuf
137
+ hbeg_idx = @patterns[:HEADER_BEGIN] =~ fbuf
138
+ unless hbeg_idx
139
+ raise ArgumentError, "Failed to located beginning of Header"
140
+ end
141
+
142
+ head_begin_buf = fbuf[ hbeg_idx .. -1 ]
143
+ if head_begin_buf.start_with? "\n"
144
+ head_begin_buf.sub!(/\n+/, '')
145
+ end
146
+ hend_idx = @patterns[:HEADER_END] =~ head_begin_buf
147
+ unless hend_idx
148
+ raise ArgumentError, @patterns[:HEADER_END], "Failed to locate ending of Header"
149
+ end
150
+ @hbuf = head_begin_buf[ 0..hend_idx ]
151
+
152
+ bbuf = $~.post_match
153
+ if bbuf.start_with? "\n"
154
+ bbuf.sub!(/\n+/, '')
155
+ end
156
+ lend_idx = @patterns[:LINE_END] =~ bbuf
157
+ unless lend_idx
158
+ raise ArgumentError, @patterns[:LINE_END], "Failed to locate ending of lines"
159
+ end
160
+ @bbuf = $~.pre_match
161
+ end
162
+
163
+ def header_lines
164
+ return @hbuf if @hbuf
165
+ seperate_body_head
166
+ @hbuf
167
+ end
168
+
169
+ def body_lines
170
+ return @bbuf if @bbuf
171
+ seperate_body_head
172
+ @bbuf
173
+ end
174
+
175
+ # check whether given 2 groups appear within boundaries of each other
176
+ # group = [ title, beginning_column, ending_col ]
177
+ # note: the ending column is exclusive
178
+ def overlap?( group_a, group_b )
179
+ (_, a_bc, a_ec) = group_a
180
+ (_, b_bc, b_ec) = group_b
181
+ (b_bc.between?( a_bc, a_ec.pred ) ||
182
+ b_ec.between?( a_bc, a_ec.pred ) ||
183
+ a_bc.between?( b_bc, b_ec.pred ))
184
+ end
185
+
186
+
187
+ def insert_to( grouping , title, bc, ec )
188
+ group = grouping.find { |group| overlap?(group, [title, bc, ec]) }
189
+ if group
190
+ group[0].push( title )
191
+ ibc, iec = group[1..2]
192
+ group[1] = [bc, ibc].min
193
+ group[2] = [ec, iec].max
194
+ else
195
+ grouping.push( [[title], bc, ec] )
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,3 @@
1
+ module Ripcols
2
+ VERSION = "0.1.4"
3
+ end
data/ripcols.gemspec ADDED
@@ -0,0 +1,34 @@
1
+ lib = File.expand_path("lib", __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "ripcols/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "ripcols"
7
+ spec.version = Ripcols::VERSION
8
+ spec.authors = ["noor-rahim"]
9
+ spec.email = ["noorrahim619@gmail.com"]
10
+
11
+ spec.summary = %q{a library to parse unstructured table of data, ...}
12
+ spec.description = %q{...}
13
+ spec.homepage = "https://github.com/noor-rahim/ripcols"
14
+ spec.license = "MIT"
15
+
16
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
17
+
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = "https://github.com/noor-rahim/ripcols"
20
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
21
+
22
+ # Specify which files should be added to the gem when it is released.
23
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
25
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ end
27
+ spec.bindir = "exe"
28
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
29
+ spec.require_paths = ["lib"]
30
+
31
+ spec.add_development_dependency "bundler", "~> 2.0"
32
+ spec.add_development_dependency "rake", "~> 10.0"
33
+ spec.add_development_dependency "rspec", "~> 3.0"
34
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ripcols
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - noor-rahim
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-08-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description: "..."
56
+ email:
57
+ - noorrahim619@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
65
+ - Gemfile
66
+ - Gemfile.lock
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - bin/console
71
+ - bin/setup
72
+ - lib/ripcols.rb
73
+ - lib/ripcols/ripper.rb
74
+ - lib/ripcols/version.rb
75
+ - ripcols.gemspec
76
+ homepage: https://github.com/noor-rahim/ripcols
77
+ licenses:
78
+ - MIT
79
+ metadata:
80
+ homepage_uri: https://github.com/noor-rahim/ripcols
81
+ source_code_uri: https://github.com/noor-rahim/ripcols
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubygems_version: 3.0.3
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: a library to parse unstructured table of data, ...
101
+ test_files: []