docp 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 81e87ebf53a7083344fd962f72d6c4c042b9c39a
4
+ data.tar.gz: f19853fb32210c0dead362e9b402da31907ef771
5
+ SHA512:
6
+ metadata.gz: 25730126b620964b8013a945fb120d7ee6bd64c0358e418c2972680af5d378fd3e0ca09b771e90dc03d5151ed286c2a2d76fbbdaebf68e868ba800bc07a33ec7
7
+ data.tar.gz: f6c7c10ce504e706e210db8f2db43341ef7554199e75dd78d0bf84d1184b5dfeac9fe0ce6a61a64dcb1006a257ad954edb04c16977a7103be66583789251c808
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.0
4
+ before_install: gem install bundler -v 1.11.2
@@ -0,0 +1,49 @@
1
+ # Contributor Code of Conduct
2
+
3
+ As contributors and maintainers of this project, and in the interest of
4
+ fostering an open and welcoming community, we pledge to respect all people who
5
+ contribute through reporting issues, posting feature requests, updating
6
+ documentation, submitting pull requests or patches, and other activities.
7
+
8
+ We are committed to making participation in this project a harassment-free
9
+ experience for everyone, regardless of level of experience, gender, gender
10
+ identity and expression, sexual orientation, disability, personal appearance,
11
+ body size, race, ethnicity, age, religion, or nationality.
12
+
13
+ Examples of unacceptable behavior by participants include:
14
+
15
+ * The use of sexualized language or imagery
16
+ * Personal attacks
17
+ * Trolling or insulting/derogatory comments
18
+ * Public or private harassment
19
+ * Publishing other's private information, such as physical or electronic
20
+ addresses, without explicit permission
21
+ * Other unethical or unprofessional conduct
22
+
23
+ Project maintainers have the right and responsibility to remove, edit, or
24
+ reject comments, commits, code, wiki edits, issues, and other contributions
25
+ that are not aligned to this Code of Conduct, or to ban temporarily or
26
+ permanently any contributor for other behaviors that they deem inappropriate,
27
+ threatening, offensive, or harmful.
28
+
29
+ By adopting this Code of Conduct, project maintainers commit themselves to
30
+ fairly and consistently applying these principles to every aspect of managing
31
+ this project. Project maintainers who do not follow or enforce the Code of
32
+ Conduct may be permanently removed from the project team.
33
+
34
+ This code of conduct applies both within project spaces and in public spaces
35
+ when an individual is representing the project or its community.
36
+
37
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
38
+ reported by contacting a project maintainer at tarou1y@gmail.com. All
39
+ complaints will be reviewed and investigated and will result in a response that
40
+ is deemed necessary and appropriate to the circumstances. Maintainers are
41
+ obligated to maintain confidentiality with regard to the reporter of an
42
+ incident.
43
+
44
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
45
+ version 1.3.0, available at
46
+ [http://contributor-covenant.org/version/1/3/0/][version]
47
+
48
+ [homepage]: http://contributor-covenant.org
49
+ [version]: http://contributor-covenant.org/version/1/3/0/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in docp.gemspec
4
+ gemspec
@@ -0,0 +1,17 @@
1
+ base = File.expand_path("../", __FILE__)
2
+
3
+ guard :minitest do
4
+ watch(%r{^test/(.*)/?(.*)_test\.rb$}) {|m| "test/#{m[1]}_test.rb" }
5
+ watch(%r{^lib/docp/(.*)\.rb$}) {|m| "test/#{m[1]}_test.rb" }
6
+
7
+ watch(%r{^test/(.*)/integration/?(.*)_test\.rb$}) {|m| "test/#{m[1]}_test.rb" }
8
+ watch(%r{^lib/docp/(.*)\.rb$}) { integration_tests() }
9
+ end
10
+
11
+ def integration_tests(resource = :all)
12
+ if resource == :all
13
+ Dir["test/integration/*"]
14
+ else
15
+ Dir["test/integration/#{resource}_*.rb"]
16
+ end
17
+ end
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 dalks
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,41 @@
1
+ # Docp
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/docp`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'docp'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install docp
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/docp. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
36
+
37
+
38
+ ## License
39
+
40
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
41
+
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "docp"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'docp/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "docp"
8
+ spec.version = Docp::VERSION
9
+ spec.authors = ["akiaki0"]
10
+ spec.email = ["akiaki0pon@gmail.com"]
11
+
12
+ spec.summary = %q{html table parse gem}
13
+ spec.description = %q{html table parse gem}
14
+ spec.homepage = "https://github.com/akiaki0/docp"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
18
+ # delete this section to allow pushing this gem to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
23
+ # end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_development_dependency "bundler", "~> 1.11"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "minitest", "~> 5.0"
33
+ spec.add_development_dependency "nokogiri"
34
+ spec.add_development_dependency 'minitest-reporters', '1.0.5'
35
+ spec.add_development_dependency 'mini_backtrace', '0.1.3'
36
+ spec.add_development_dependency 'guard-minitest', '2.3.1'
37
+ end
@@ -0,0 +1,15 @@
1
+ require 'forwardable'
2
+ require 'nokogiri'
3
+ require "docp/version"
4
+ require "docp/table_header"
5
+ require "docp/table"
6
+
7
+ class String
8
+ def del_space
9
+ gsub(/[[:space:][:cntrl:]]/, "")
10
+ end
11
+ end
12
+
13
+ module Docp
14
+ # Your code goes here...
15
+ end
@@ -0,0 +1,183 @@
1
+ require 'docp/table_doc'
2
+ require 'docp/table_row'
3
+ require 'docp/table_remove_methods'
4
+
5
+ module Nokogiri
6
+ class XML::Element
7
+ def row_elements
8
+ search('*').select {|el| ['td', 'th'].include?(el.name)}
9
+ # [elements, search('*//td', '*//th')].sort {|a, b| a.count <=> b.count}[-1]
10
+ end
11
+ end
12
+ end
13
+
14
+ module Docp
15
+ class Table
16
+ class << self
17
+ def parse parse_doc, header_parser, &block
18
+ TableDoc.new.parse(parse_doc, header_parser, &block)
19
+ end
20
+
21
+ def find src_doc, header_parser
22
+ header_parser = header_parser.is_a?(Hash) ? TableHeader.new(nil, header_parser) : header_parser
23
+ src_doc = Nokogiri::HTML(src_doc) if src_doc.is_a?(String)
24
+ parse_doc = src_doc.respond_to?(:to_html) ? Nokogiri::HTML(src_doc.to_html) : Nokogiri::HTML(src_doc.parser.to_html)
25
+ parse_doc.search('//table').each {|table, i|
26
+ next if table.at('table') || table.at('tr table')
27
+ table.search('tr').map.with_index do|tr, header_index|
28
+ break if header_parser.exclude_ptn?(tr)
29
+ next unless header_parser.include_ptn?(tr)
30
+ next unless header_parser.required_all?(tr)
31
+ yield table, tr, header_index
32
+ break
33
+ end
34
+ }
35
+ end
36
+
37
+ def header_required_all? header_tr, header_parser
38
+
39
+ end
40
+ end
41
+
42
+ extend TableRemoveMethods
43
+ extend Forwardable
44
+ include TableRemoveMethods
45
+ include Enumerable
46
+ class HeaderCountNotMatchError < StandardError; end
47
+ class RequiredAttributesUndefined < StandardError; end
48
+
49
+ def_delegators :@this_table, :at, :search, :elements, :row_elements
50
+ attr_reader :doc
51
+ attr_reader :header_required_undefineds
52
+ def initialize doc, header_parser, table, header_tr, header_index
53
+ @doc = doc
54
+ @this_table = Nokogiri::XML::Element.new("table", @doc)
55
+ @header_parser = header_parser.child || header_parser
56
+ if @header_parser.columns.any?
57
+ parse_table table, header_tr, header_index
58
+ end
59
+ end
60
+
61
+ def parse_table table, header_tr, header_index
62
+ @header_parser.before_parse.call(table) if @header_parser.before_parse
63
+ doc_remove_attributes(table)
64
+ if @header_parser.vertical
65
+ header_tr = @this_table.add_child Nokogiri::XML::Element.new("tr", @doc)
66
+ row_tr = @this_table.add_child Nokogiri::XML::Element.new("tr", @doc)
67
+ header_tr[:class] = "table-header"
68
+ table.row_elements.each do|td|
69
+ if col = @header_parser.columns.find {|c| c.include_ptn?(td)}
70
+ cltd = td.clone
71
+ cltd[:class] = col.name
72
+ header_tr.add_child cltd.clone
73
+ if ntd = td.next_element
74
+ ntd[:class] = ntd[:class] ? "#{ntd[:class]},#{col.name}" : col.name
75
+ row_tr.add_child ntd.clone
76
+ else
77
+ #raise "NextElementNotfound #{ntd.class} #{ntd}\n"
78
+ end
79
+ end
80
+ end
81
+ #set_vertical_row_attributes(header_tr)
82
+ @doc.add_child(@this_table)
83
+ else
84
+ #if header_required_all?(header_tr)
85
+ if row_elements = table.search('tr')[header_index..-1]
86
+ header_tr[:class] = "table-header"
87
+ @this_table.add_child row_elements
88
+ set_header_attributes(header_tr)
89
+ set_row_attributes(header_tr, @this_table.search('tr')[1..-1])
90
+ @doc.add_child(@this_table)
91
+ end
92
+ #end
93
+ end
94
+ self
95
+ end
96
+
97
+ def get_row_class_names tr_elements
98
+ tr_elements.map {|td|
99
+ next unless td[:class]
100
+ td[:class].split(",").map(&:to_sym)
101
+ }.compact.flatten
102
+ end
103
+
104
+ def row_required_all? tr_elements
105
+ ret = get_row_class_names(tr_elements).select {|name| @header_parser.required_keys.include?(name)}
106
+ ret.count >= @header_parser.required_keys.count
107
+ end
108
+
109
+ def extend_row tr
110
+ TableRow.new(tr, @header_parser)
111
+ end
112
+
113
+ def errors
114
+ mes = @this_table.search('tr').map {|tr| tr[:error]}.compact
115
+ mes
116
+ end
117
+
118
+ def each args = {}
119
+ @this_table.search('tr').each {|tr|
120
+ header = tr.at('.table-header')
121
+ next if args[:header].nil? && tr[:class] == "table-header"
122
+ next if tr.row_elements.select {|td| td[:class]}.empty?
123
+ yield extend_row(header) if args[:header]
124
+ if row_required_all?(tr.row_elements)
125
+ yield extend_row(tr)
126
+ end
127
+ }
128
+ end
129
+
130
+ def header
131
+ #@this_table.at('.table-header')
132
+ extend_row @this_table.at('.table-header')
133
+ end
134
+
135
+ def rows args = {}
136
+ [].tap {|ret|
137
+ each(args) {|row|
138
+ ret << row.tap {|r| yield r if block_given?} }
139
+ }
140
+ end
141
+
142
+ # alias :rows :map
143
+ alias :rows_each :each
144
+ alias :rows_each_with_index :each_with_index
145
+
146
+ # def set_vertical_row_attributes tr
147
+ # tr.row_elements.each {|td|
148
+ # @header_parser.columns.each do|col|
149
+ # if col.include_ptn?(td)
150
+ # if ntd = td.next_element
151
+ # ntd[:class] = ntd[:class] ? "#{ntd[:class]},#{col.name}" : col.name
152
+ # end
153
+ # end
154
+ # end
155
+ # }
156
+ # end
157
+
158
+ def set_header_attributes tr
159
+ tr.row_elements.each {|td|
160
+ @header_parser.columns.each do|col|
161
+ if col.include_ptn?(td)
162
+ td[:class] = td[:class] ? "#{td[:class]},#{col.name}" : col.name
163
+ end
164
+ end
165
+ }
166
+ end
167
+
168
+ def set_row_attributes header_tr, tr_rows
169
+ tr_rows.each_with_index {|tr, i|
170
+ if header_tr.row_elements.count != tr.row_elements.count
171
+ tr[:error] = "#{HeaderCountNotMatchError}"
172
+ end
173
+ header_tr.row_elements.each_with_index do|h, x|
174
+ next if h[:class].nil? || tr.row_elements[x].nil?
175
+ tr.row_elements[x][:class] = h[:class] if h[:class]
176
+ end
177
+ unless row_required_all?(tr.row_elements)
178
+ tr[:error] = "#{RequiredAttributesUndefined}"
179
+ end
180
+ }
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,41 @@
1
+ module Docp
2
+ class TableDoc
3
+ include Enumerable
4
+ attr_reader :doc, :tables
5
+ def initialize
6
+ @doc = Nokogiri::HTML::DocumentFragment.parse ""
7
+ @tables = []
8
+ end
9
+
10
+ def parse parse_doc, header_parser, &block
11
+ Docp::Table.find(parse_doc, header_parser) do|table, header_tr, header_index|
12
+ @tables << Docp::Table.new(@doc, header_parser, table, header_tr, header_index)
13
+ block.call @tables.last if block_given?
14
+ end
15
+ self
16
+ end
17
+
18
+ def each
19
+ @tables.each {|table| yield table }
20
+ end
21
+
22
+ def rows &block
23
+ @tables.map(&:rows).flatten.map {|row|
24
+ yield row if block_given?
25
+ row
26
+ }
27
+ end
28
+
29
+ def header_required_undefineds
30
+ @tables.map(&:header_required_undefineds).compact
31
+ end
32
+
33
+ def empty?
34
+ @tables.empty?
35
+ end
36
+
37
+ def any?
38
+ @tables.any?
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,134 @@
1
+ require 'docp/table_header_ptn'
2
+ module Docp
3
+ class TableHeader
4
+ include TableHeaderPtn
5
+
6
+ attr_reader :columns
7
+ attr_reader :match_block
8
+ attr_reader :child
9
+ attr_accessor :required_attributes
10
+
11
+ #TableOption
12
+ attr_accessor :before_parse
13
+ attr_accessor :vertical
14
+
15
+ #RowOption
16
+ attr_accessor :default_format
17
+ attr_accessor :after_to_hash
18
+
19
+ alias :required_keys :required_attributes
20
+
21
+ def initialize match = nil, args = {}, &block
22
+ @columns = []
23
+ @required_columns = []
24
+ @required_attributes = []
25
+ args.each {|k, v| send("#{k}=", v) }
26
+ @default_format ||= :text
27
+ block.call(self, match)
28
+ set_required_columns
29
+ end
30
+
31
+ def match_block &block
32
+ @match_block = block if block_given?
33
+ @match_block
34
+ end
35
+
36
+ #if match_block CreateSelfInstance & Schema == Child
37
+ def include_ptn? tr
38
+ [@include_ptn || @columns.map(&:include_ptn)].flatten.map.with_index do|ptn, i|
39
+ if ptn?(tr.search('*'), ptn)
40
+ match = {ptn: ptn, index: i, tr: tr}
41
+ @child = TableHeader.new match, &@match_block if @match_block
42
+ if @child
43
+ return !@child.exclude_ptn?(tr.search('*'))
44
+ else
45
+ return true
46
+ end
47
+ end
48
+ end.any?
49
+ end
50
+
51
+ def [] name
52
+ name = name.to_sym if name.is_a?(String)
53
+ @columns.find {|col| col.name == name}
54
+ end
55
+
56
+ def set_required_columns
57
+ #keys = @required_attributes.select {|name| !@columns.find {|col| col.name == name} }
58
+ #raise "Column NotFound #{keys}" if keys.any?
59
+ if @required_attributes.any?
60
+ @columns.select do|col|
61
+ f = @required_attributes.find {|name| col.name == name}
62
+ f ? col.required = true : col.required = false
63
+ end
64
+ else
65
+ required_columns.map {|col| col.required = false }
66
+ end
67
+ end
68
+
69
+ def required_attributes= names
70
+ @required_attributes = [names].flatten.compact
71
+ set_required_columns
72
+ end
73
+
74
+ def required_columns
75
+ @columns.select {|col| col.required}
76
+ end
77
+
78
+ def required_all? tr
79
+ return true if required_columns.empty?
80
+ cols = required_columns.select {|col| col.include_ptn?(tr.row_elements) }
81
+ if cols.count >= required_keys.count
82
+ true
83
+ else
84
+ keys = required_keys.dup
85
+ cols.each {|col| keys.delete(col.name)}
86
+ #header_required_undefineds = { keys: keys, tr: tr.clone }
87
+ nil
88
+ end
89
+ end
90
+
91
+ def no_hash_keys
92
+ @columns.select(&:no_hash).map(&:name)
93
+ end
94
+
95
+ def add h
96
+ col = Column.new(h.merge(default_format: @default_format))
97
+ yield col if block_given?
98
+ @columns.push(*[col, col.children].flatten)
99
+ col
100
+ end
101
+
102
+ def swap h
103
+ col = Column.new(h.merge(default_format: @default_format))
104
+ if i = @columns.index {|ch| ch.name == col.name}
105
+ @columns[i] = col
106
+ else
107
+ raise ArgumentError, "#{col.name} ColumnNotFound"
108
+ end
109
+ end
110
+
111
+ class Column
112
+ include TableHeaderPtn
113
+ attr_reader :name
114
+ attr_reader :include_ptn
115
+ attr_reader :no_hash
116
+ attr_reader :children
117
+
118
+ attr_accessor :required
119
+ attr_accessor :format
120
+ def initialize hash
121
+ @children = []
122
+ @name, @include_ptn = hash.shift
123
+ @format = hash[:format] || hash[:default_format]
124
+ @no_hash = hash[:no_hash]
125
+ end
126
+
127
+ def add name, hash
128
+ @no_hash = true
129
+ ch = Column.new( { name => nil, }.merge(hash) )
130
+ @children << ch
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,32 @@
1
+ module Docp
2
+ module TableHeaderPtn
3
+ attr_accessor :include_ptn, :exclude_ptn
4
+ attr_accessor :after_check_val
5
+ def check_ptn elem, ptn
6
+ text = after_check_val ? after_check_val.call(elem) : elem.text.del_space
7
+ if ptn.is_a?(Regexp)
8
+ text =~ ptn
9
+ else
10
+ [ptn].flatten.find {|v|
11
+ if v.is_a?(Regexp)
12
+ text =~ v
13
+ else
14
+ text == v
15
+ end
16
+ }
17
+ end
18
+ end
19
+
20
+ def ptn? elems, ptn
21
+ [elems].flatten.find {|el| check_ptn(el, ptn) }
22
+ end
23
+
24
+ def exclude_ptn? node
25
+ ptn?(node, @exclude_ptn) if @exclude_ptn
26
+ end
27
+
28
+ def include_ptn? node
29
+ ptn?(node, @include_ptn)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,79 @@
1
+ module Docp
2
+ module TableRemoveMethods
3
+ def doc_remove_attributes(remove_doc)
4
+ spam = "//*[contains(@style,'display:none')]"
5
+ remove_doc.search(spam).remove
6
+ remove_doc.search('tr', 'th', 'td').each do|row|
7
+ row.attributes.each do|k, v|
8
+ row.delete(k)
9
+ end
10
+ end
11
+ end
12
+
13
+ def colspan_join parse_doc
14
+ parse_doc.search('tr').each_with_index {|tr, y|
15
+ next_tr = tr.next_element
16
+ tr.elements.each_with_index do|td, x|
17
+ next if td[:colspan].nil? || next_tr.nil?
18
+ col_depth = td[:colspan].to_i - 1
19
+ col_depth.downto(0).map do|xx|
20
+ next if next_tr.elements[xx].nil?
21
+ td.next = next_tr.elements[xx].clone.tap {|e|
22
+ e.content = td.text + " " + next_tr.elements[xx].text
23
+ }
24
+ next_tr.elements[xx]
25
+ end.compact.map(&:remove)
26
+ td.remove
27
+ end
28
+
29
+ # tr.elements.each do|ch|
30
+ # ch.attributes.each do|k, v|
31
+ # ch.delete(k) if k=="colspan"
32
+ # end
33
+ # end
34
+ }
35
+ end
36
+
37
+ def rowspan_join parse_doc
38
+ parse_doc.search('tr').each_with_index {|tr, y|
39
+ row_depth = 0
40
+ no_rowspans = []
41
+ tr.elements.each do|td|
42
+ if td[:rowspan]
43
+ row_depth = td[:rowspan].to_i-1
44
+ else
45
+ no_rowspans << td
46
+ end
47
+ end
48
+
49
+ if row_depth > 0
50
+ row_depth.times do
51
+ if tr.next_element
52
+ tr.next_element.elements.each_with_index do|td, i|
53
+ if no_rowspans[i]
54
+ no_rowspans[i].content = "#{no_rowspans[i].text} #{td.text}"
55
+ else
56
+ tr.add_child td
57
+ end
58
+ end
59
+ tr.next_element.remove
60
+ end
61
+ end
62
+ end
63
+ }
64
+ end
65
+
66
+ def rowspan_flatten parse_doc
67
+ parse_doc.search('tr').each_with_index {|tr, y|
68
+ row_depth = tr.elements.map {|td| td[:rowspan].to_i - 1 if td[:rowspan]}.compact.sort[-1]
69
+ next if row_depth.nil?
70
+ row_depth.times do
71
+ if tr.next_element
72
+ tr.add_child tr.next_element.elements
73
+ tr.next_element.remove
74
+ end
75
+ end
76
+ }
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,73 @@
1
+ module Docp
2
+ class TableRow
3
+ extend Forwardable
4
+ include Enumerable
5
+ def_delegators :@row, :empty?, :any?, :count
6
+ def_delegators :@tr, :at, :search, :elements, :row_elements
7
+ attr_reader :tr
8
+ attr_reader :formats
9
+ def initialize tr, header_parser
10
+ @tr = tr
11
+ @row = row_elements.select {|td| td[:class] }
12
+ @no_hash_keys = header_parser.no_hash_keys
13
+ @after_to_hash = header_parser.after_to_hash
14
+ @formats = {}
15
+ header_parser.columns.each {|col|
16
+ [col, col.children].flatten.each {|ch|
17
+ if @tr[:class] == "table-header"
18
+ @formats[ch.name] = format(self[ch.name], :text)
19
+ else
20
+ @formats[ch.name] = format(self[ch.name], ch.format)
21
+ end
22
+ }
23
+ }
24
+ end
25
+
26
+ def [] name
27
+ ret = case name
28
+ when Symbol, String
29
+ name = name.to_s if name.is_a?(Symbol)
30
+ @row.find {|r|
31
+ r[:class] == name || r[:class].split(',').map {|cl| cl == name}.any?
32
+ }
33
+ else
34
+ @row[name]
35
+ end
36
+ if ret
37
+ ret
38
+ else
39
+ doc = Nokogiri::HTML::DocumentFragment.parse ""
40
+ Nokogiri::XML::Element.new "td", doc
41
+ end
42
+ end
43
+
44
+ def format td, format
45
+ if format.is_a?(Symbol)
46
+ td.send(format)
47
+ elsif format.is_a?(Proc)
48
+ par = format.parameters.map(&:last).map
49
+ if par.include?(:formats)
50
+ -> { format.call(*par.map {|name| name == :row ? self : eval(name.to_s) }) }
51
+ else
52
+ format.call(*par.map{|name| name == :row ? self : eval(name.to_s) })
53
+ end
54
+ else
55
+ format
56
+ end
57
+ end
58
+
59
+ def each
60
+ @row.each {|td| yield td}
61
+ end
62
+
63
+ def to_hash
64
+ ret = {}
65
+ @formats.each {|k, v|
66
+ next if @no_hash_keys.include?(k)
67
+ ret[k] = v.is_a?(Proc) ? v.call : v
68
+ }
69
+ @after_to_hash.call(ret, self) if @after_to_hash
70
+ ret
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,3 @@
1
+ module Docp
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,161 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: docp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - akiaki0
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-05-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest-reporters
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 1.0.5
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.0.5
83
+ - !ruby/object:Gem::Dependency
84
+ name: mini_backtrace
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 0.1.3
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 0.1.3
97
+ - !ruby/object:Gem::Dependency
98
+ name: guard-minitest
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '='
102
+ - !ruby/object:Gem::Version
103
+ version: 2.3.1
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '='
109
+ - !ruby/object:Gem::Version
110
+ version: 2.3.1
111
+ description: html table parse gem
112
+ email:
113
+ - akiaki0pon@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - ".travis.yml"
120
+ - CODE_OF_CONDUCT.md
121
+ - Gemfile
122
+ - Guardfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - bin/console
127
+ - bin/setup
128
+ - docp.gemspec
129
+ - lib/docp.rb
130
+ - lib/docp/table.rb
131
+ - lib/docp/table_doc.rb
132
+ - lib/docp/table_header.rb
133
+ - lib/docp/table_header_ptn.rb
134
+ - lib/docp/table_remove_methods.rb
135
+ - lib/docp/table_row.rb
136
+ - lib/docp/version.rb
137
+ homepage: https://github.com/akiaki0/docp
138
+ licenses:
139
+ - MIT
140
+ metadata: {}
141
+ post_install_message:
142
+ rdoc_options: []
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ required_rubygems_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
155
+ requirements: []
156
+ rubyforge_project:
157
+ rubygems_version: 2.5.1
158
+ signing_key:
159
+ specification_version: 4
160
+ summary: html table parse gem
161
+ test_files: []