docp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 81e87ebf53a7083344fd962f72d6c4c042b9c39a
4
+ data.tar.gz: f19853fb32210c0dead362e9b402da31907ef771
5
+ SHA512:
6
+ metadata.gz: 25730126b620964b8013a945fb120d7ee6bd64c0358e418c2972680af5d378fd3e0ca09b771e90dc03d5151ed286c2a2d76fbbdaebf68e868ba800bc07a33ec7
7
+ data.tar.gz: f6c7c10ce504e706e210db8f2db43341ef7554199e75dd78d0bf84d1184b5dfeac9fe0ce6a61a64dcb1006a257ad954edb04c16977a7103be66583789251c808
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.0
4
+ before_install: gem install bundler -v 1.11.2
@@ -0,0 +1,49 @@
1
+ # Contributor Code of Conduct
2
+
3
+ As contributors and maintainers of this project, and in the interest of
4
+ fostering an open and welcoming community, we pledge to respect all people who
5
+ contribute through reporting issues, posting feature requests, updating
6
+ documentation, submitting pull requests or patches, and other activities.
7
+
8
+ We are committed to making participation in this project a harassment-free
9
+ experience for everyone, regardless of level of experience, gender, gender
10
+ identity and expression, sexual orientation, disability, personal appearance,
11
+ body size, race, ethnicity, age, religion, or nationality.
12
+
13
+ Examples of unacceptable behavior by participants include:
14
+
15
+ * The use of sexualized language or imagery
16
+ * Personal attacks
17
+ * Trolling or insulting/derogatory comments
18
+ * Public or private harassment
19
+ * Publishing other's private information, such as physical or electronic
20
+ addresses, without explicit permission
21
+ * Other unethical or unprofessional conduct
22
+
23
+ Project maintainers have the right and responsibility to remove, edit, or
24
+ reject comments, commits, code, wiki edits, issues, and other contributions
25
+ that are not aligned to this Code of Conduct, or to ban temporarily or
26
+ permanently any contributor for other behaviors that they deem inappropriate,
27
+ threatening, offensive, or harmful.
28
+
29
+ By adopting this Code of Conduct, project maintainers commit themselves to
30
+ fairly and consistently applying these principles to every aspect of managing
31
+ this project. Project maintainers who do not follow or enforce the Code of
32
+ Conduct may be permanently removed from the project team.
33
+
34
+ This code of conduct applies both within project spaces and in public spaces
35
+ when an individual is representing the project or its community.
36
+
37
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
38
+ reported by contacting a project maintainer at tarou1y@gmail.com. All
39
+ complaints will be reviewed and investigated and will result in a response that
40
+ is deemed necessary and appropriate to the circumstances. Maintainers are
41
+ obligated to maintain confidentiality with regard to the reporter of an
42
+ incident.
43
+
44
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
45
+ version 1.3.0, available at
46
+ [http://contributor-covenant.org/version/1/3/0/][version]
47
+
48
+ [homepage]: http://contributor-covenant.org
49
+ [version]: http://contributor-covenant.org/version/1/3/0/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in docp.gemspec
4
+ gemspec
@@ -0,0 +1,17 @@
1
+ base = File.expand_path("../", __FILE__)
2
+
3
+ guard :minitest do
4
+ watch(%r{^test/(.*)/?(.*)_test\.rb$}) {|m| "test/#{m[1]}_test.rb" }
5
+ watch(%r{^lib/docp/(.*)\.rb$}) {|m| "test/#{m[1]}_test.rb" }
6
+
7
+ watch(%r{^test/(.*)/integration/?(.*)_test\.rb$}) {|m| "test/#{m[1]}_test.rb" }
8
+ watch(%r{^lib/docp/(.*)\.rb$}) { integration_tests() }
9
+ end
10
+
11
+ def integration_tests(resource = :all)
12
+ if resource == :all
13
+ Dir["test/integration/*"]
14
+ else
15
+ Dir["test/integration/#{resource}_*.rb"]
16
+ end
17
+ end
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 dalks
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,41 @@
1
+ # Docp
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/docp`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'docp'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install docp
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/docp. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
36
+
37
+
38
+ ## License
39
+
40
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
41
+
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "docp"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'docp/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "docp"
8
+ spec.version = Docp::VERSION
9
+ spec.authors = ["akiaki0"]
10
+ spec.email = ["akiaki0pon@gmail.com"]
11
+
12
+ spec.summary = %q{html table parse gem}
13
+ spec.description = %q{html table parse gem}
14
+ spec.homepage = "https://github.com/akiaki0/docp"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
18
+ # delete this section to allow pushing this gem to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
23
+ # end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_development_dependency "bundler", "~> 1.11"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "minitest", "~> 5.0"
33
+ spec.add_development_dependency "nokogiri"
34
+ spec.add_development_dependency 'minitest-reporters', '1.0.5'
35
+ spec.add_development_dependency 'mini_backtrace', '0.1.3'
36
+ spec.add_development_dependency 'guard-minitest', '2.3.1'
37
+ end
@@ -0,0 +1,15 @@
1
+ require 'forwardable'
2
+ require 'nokogiri'
3
+ require "docp/version"
4
+ require "docp/table_header"
5
+ require "docp/table"
6
+
7
+ class String
8
+ def del_space
9
+ gsub(/[[:space:][:cntrl:]]/, "")
10
+ end
11
+ end
12
+
13
+ module Docp
14
+ # Your code goes here...
15
+ end
@@ -0,0 +1,183 @@
1
+ require 'docp/table_doc'
2
+ require 'docp/table_row'
3
+ require 'docp/table_remove_methods'
4
+
5
+ module Nokogiri
6
+ class XML::Element
7
+ def row_elements
8
+ search('*').select {|el| ['td', 'th'].include?(el.name)}
9
+ # [elements, search('*//td', '*//th')].sort {|a, b| a.count <=> b.count}[-1]
10
+ end
11
+ end
12
+ end
13
+
14
+ module Docp
15
+ class Table
16
+ class << self
17
+ def parse parse_doc, header_parser, &block
18
+ TableDoc.new.parse(parse_doc, header_parser, &block)
19
+ end
20
+
21
+ def find src_doc, header_parser
22
+ header_parser = header_parser.is_a?(Hash) ? TableHeader.new(nil, header_parser) : header_parser
23
+ src_doc = Nokogiri::HTML(src_doc) if src_doc.is_a?(String)
24
+ parse_doc = src_doc.respond_to?(:to_html) ? Nokogiri::HTML(src_doc.to_html) : Nokogiri::HTML(src_doc.parser.to_html)
25
+ parse_doc.search('//table').each {|table, i|
26
+ next if table.at('table') || table.at('tr table')
27
+ table.search('tr').map.with_index do|tr, header_index|
28
+ break if header_parser.exclude_ptn?(tr)
29
+ next unless header_parser.include_ptn?(tr)
30
+ next unless header_parser.required_all?(tr)
31
+ yield table, tr, header_index
32
+ break
33
+ end
34
+ }
35
+ end
36
+
37
+ def header_required_all? header_tr, header_parser
38
+
39
+ end
40
+ end
41
+
42
+ extend TableRemoveMethods
43
+ extend Forwardable
44
+ include TableRemoveMethods
45
+ include Enumerable
46
+ class HeaderCountNotMatchError < StandardError; end
47
+ class RequiredAttributesUndefined < StandardError; end
48
+
49
+ def_delegators :@this_table, :at, :search, :elements, :row_elements
50
+ attr_reader :doc
51
+ attr_reader :header_required_undefineds
52
+ def initialize doc, header_parser, table, header_tr, header_index
53
+ @doc = doc
54
+ @this_table = Nokogiri::XML::Element.new("table", @doc)
55
+ @header_parser = header_parser.child || header_parser
56
+ if @header_parser.columns.any?
57
+ parse_table table, header_tr, header_index
58
+ end
59
+ end
60
+
61
+ def parse_table table, header_tr, header_index
62
+ @header_parser.before_parse.call(table) if @header_parser.before_parse
63
+ doc_remove_attributes(table)
64
+ if @header_parser.vertical
65
+ header_tr = @this_table.add_child Nokogiri::XML::Element.new("tr", @doc)
66
+ row_tr = @this_table.add_child Nokogiri::XML::Element.new("tr", @doc)
67
+ header_tr[:class] = "table-header"
68
+ table.row_elements.each do|td|
69
+ if col = @header_parser.columns.find {|c| c.include_ptn?(td)}
70
+ cltd = td.clone
71
+ cltd[:class] = col.name
72
+ header_tr.add_child cltd.clone
73
+ if ntd = td.next_element
74
+ ntd[:class] = ntd[:class] ? "#{ntd[:class]},#{col.name}" : col.name
75
+ row_tr.add_child ntd.clone
76
+ else
77
+ #raise "NextElementNotfound #{ntd.class} #{ntd}\n"
78
+ end
79
+ end
80
+ end
81
+ #set_vertical_row_attributes(header_tr)
82
+ @doc.add_child(@this_table)
83
+ else
84
+ #if header_required_all?(header_tr)
85
+ if row_elements = table.search('tr')[header_index..-1]
86
+ header_tr[:class] = "table-header"
87
+ @this_table.add_child row_elements
88
+ set_header_attributes(header_tr)
89
+ set_row_attributes(header_tr, @this_table.search('tr')[1..-1])
90
+ @doc.add_child(@this_table)
91
+ end
92
+ #end
93
+ end
94
+ self
95
+ end
96
+
97
+ def get_row_class_names tr_elements
98
+ tr_elements.map {|td|
99
+ next unless td[:class]
100
+ td[:class].split(",").map(&:to_sym)
101
+ }.compact.flatten
102
+ end
103
+
104
+ def row_required_all? tr_elements
105
+ ret = get_row_class_names(tr_elements).select {|name| @header_parser.required_keys.include?(name)}
106
+ ret.count >= @header_parser.required_keys.count
107
+ end
108
+
109
+ def extend_row tr
110
+ TableRow.new(tr, @header_parser)
111
+ end
112
+
113
+ def errors
114
+ mes = @this_table.search('tr').map {|tr| tr[:error]}.compact
115
+ mes
116
+ end
117
+
118
+ def each args = {}
119
+ @this_table.search('tr').each {|tr|
120
+ header = tr.at('.table-header')
121
+ next if args[:header].nil? && tr[:class] == "table-header"
122
+ next if tr.row_elements.select {|td| td[:class]}.empty?
123
+ yield extend_row(header) if args[:header]
124
+ if row_required_all?(tr.row_elements)
125
+ yield extend_row(tr)
126
+ end
127
+ }
128
+ end
129
+
130
+ def header
131
+ #@this_table.at('.table-header')
132
+ extend_row @this_table.at('.table-header')
133
+ end
134
+
135
+ def rows args = {}
136
+ [].tap {|ret|
137
+ each(args) {|row|
138
+ ret << row.tap {|r| yield r if block_given?} }
139
+ }
140
+ end
141
+
142
+ # alias :rows :map
143
+ alias :rows_each :each
144
+ alias :rows_each_with_index :each_with_index
145
+
146
+ # def set_vertical_row_attributes tr
147
+ # tr.row_elements.each {|td|
148
+ # @header_parser.columns.each do|col|
149
+ # if col.include_ptn?(td)
150
+ # if ntd = td.next_element
151
+ # ntd[:class] = ntd[:class] ? "#{ntd[:class]},#{col.name}" : col.name
152
+ # end
153
+ # end
154
+ # end
155
+ # }
156
+ # end
157
+
158
+ def set_header_attributes tr
159
+ tr.row_elements.each {|td|
160
+ @header_parser.columns.each do|col|
161
+ if col.include_ptn?(td)
162
+ td[:class] = td[:class] ? "#{td[:class]},#{col.name}" : col.name
163
+ end
164
+ end
165
+ }
166
+ end
167
+
168
+ def set_row_attributes header_tr, tr_rows
169
+ tr_rows.each_with_index {|tr, i|
170
+ if header_tr.row_elements.count != tr.row_elements.count
171
+ tr[:error] = "#{HeaderCountNotMatchError}"
172
+ end
173
+ header_tr.row_elements.each_with_index do|h, x|
174
+ next if h[:class].nil? || tr.row_elements[x].nil?
175
+ tr.row_elements[x][:class] = h[:class] if h[:class]
176
+ end
177
+ unless row_required_all?(tr.row_elements)
178
+ tr[:error] = "#{RequiredAttributesUndefined}"
179
+ end
180
+ }
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,41 @@
1
+ module Docp
2
+ class TableDoc
3
+ include Enumerable
4
+ attr_reader :doc, :tables
5
+ def initialize
6
+ @doc = Nokogiri::HTML::DocumentFragment.parse ""
7
+ @tables = []
8
+ end
9
+
10
+ def parse parse_doc, header_parser, &block
11
+ Docp::Table.find(parse_doc, header_parser) do|table, header_tr, header_index|
12
+ @tables << Docp::Table.new(@doc, header_parser, table, header_tr, header_index)
13
+ block.call @tables.last if block_given?
14
+ end
15
+ self
16
+ end
17
+
18
+ def each
19
+ @tables.each {|table| yield table }
20
+ end
21
+
22
+ def rows &block
23
+ @tables.map(&:rows).flatten.map {|row|
24
+ yield row if block_given?
25
+ row
26
+ }
27
+ end
28
+
29
+ def header_required_undefineds
30
+ @tables.map(&:header_required_undefineds).compact
31
+ end
32
+
33
+ def empty?
34
+ @tables.empty?
35
+ end
36
+
37
+ def any?
38
+ @tables.any?
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,134 @@
1
+ require 'docp/table_header_ptn'
2
+ module Docp
3
+ class TableHeader
4
+ include TableHeaderPtn
5
+
6
+ attr_reader :columns
7
+ attr_reader :match_block
8
+ attr_reader :child
9
+ attr_accessor :required_attributes
10
+
11
+ #TableOption
12
+ attr_accessor :before_parse
13
+ attr_accessor :vertical
14
+
15
+ #RowOption
16
+ attr_accessor :default_format
17
+ attr_accessor :after_to_hash
18
+
19
+ alias :required_keys :required_attributes
20
+
21
+ def initialize match = nil, args = {}, &block
22
+ @columns = []
23
+ @required_columns = []
24
+ @required_attributes = []
25
+ args.each {|k, v| send("#{k}=", v) }
26
+ @default_format ||= :text
27
+ block.call(self, match)
28
+ set_required_columns
29
+ end
30
+
31
+ def match_block &block
32
+ @match_block = block if block_given?
33
+ @match_block
34
+ end
35
+
36
+ #if match_block CreateSelfInstance & Schema == Child
37
+ def include_ptn? tr
38
+ [@include_ptn || @columns.map(&:include_ptn)].flatten.map.with_index do|ptn, i|
39
+ if ptn?(tr.search('*'), ptn)
40
+ match = {ptn: ptn, index: i, tr: tr}
41
+ @child = TableHeader.new match, &@match_block if @match_block
42
+ if @child
43
+ return !@child.exclude_ptn?(tr.search('*'))
44
+ else
45
+ return true
46
+ end
47
+ end
48
+ end.any?
49
+ end
50
+
51
+ def [] name
52
+ name = name.to_sym if name.is_a?(String)
53
+ @columns.find {|col| col.name == name}
54
+ end
55
+
56
+ def set_required_columns
57
+ #keys = @required_attributes.select {|name| !@columns.find {|col| col.name == name} }
58
+ #raise "Column NotFound #{keys}" if keys.any?
59
+ if @required_attributes.any?
60
+ @columns.select do|col|
61
+ f = @required_attributes.find {|name| col.name == name}
62
+ f ? col.required = true : col.required = false
63
+ end
64
+ else
65
+ required_columns.map {|col| col.required = false }
66
+ end
67
+ end
68
+
69
+ def required_attributes= names
70
+ @required_attributes = [names].flatten.compact
71
+ set_required_columns
72
+ end
73
+
74
+ def required_columns
75
+ @columns.select {|col| col.required}
76
+ end
77
+
78
+ def required_all? tr
79
+ return true if required_columns.empty?
80
+ cols = required_columns.select {|col| col.include_ptn?(tr.row_elements) }
81
+ if cols.count >= required_keys.count
82
+ true
83
+ else
84
+ keys = required_keys.dup
85
+ cols.each {|col| keys.delete(col.name)}
86
+ #header_required_undefineds = { keys: keys, tr: tr.clone }
87
+ nil
88
+ end
89
+ end
90
+
91
+ def no_hash_keys
92
+ @columns.select(&:no_hash).map(&:name)
93
+ end
94
+
95
+ def add h
96
+ col = Column.new(h.merge(default_format: @default_format))
97
+ yield col if block_given?
98
+ @columns.push(*[col, col.children].flatten)
99
+ col
100
+ end
101
+
102
+ def swap h
103
+ col = Column.new(h.merge(default_format: @default_format))
104
+ if i = @columns.index {|ch| ch.name == col.name}
105
+ @columns[i] = col
106
+ else
107
+ raise ArgumentError, "#{col.name} ColumnNotFound"
108
+ end
109
+ end
110
+
111
+ class Column
112
+ include TableHeaderPtn
113
+ attr_reader :name
114
+ attr_reader :include_ptn
115
+ attr_reader :no_hash
116
+ attr_reader :children
117
+
118
+ attr_accessor :required
119
+ attr_accessor :format
120
+ def initialize hash
121
+ @children = []
122
+ @name, @include_ptn = hash.shift
123
+ @format = hash[:format] || hash[:default_format]
124
+ @no_hash = hash[:no_hash]
125
+ end
126
+
127
+ def add name, hash
128
+ @no_hash = true
129
+ ch = Column.new( { name => nil, }.merge(hash) )
130
+ @children << ch
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,32 @@
1
+ module Docp
2
+ module TableHeaderPtn
3
+ attr_accessor :include_ptn, :exclude_ptn
4
+ attr_accessor :after_check_val
5
+ def check_ptn elem, ptn
6
+ text = after_check_val ? after_check_val.call(elem) : elem.text.del_space
7
+ if ptn.is_a?(Regexp)
8
+ text =~ ptn
9
+ else
10
+ [ptn].flatten.find {|v|
11
+ if v.is_a?(Regexp)
12
+ text =~ v
13
+ else
14
+ text == v
15
+ end
16
+ }
17
+ end
18
+ end
19
+
20
+ def ptn? elems, ptn
21
+ [elems].flatten.find {|el| check_ptn(el, ptn) }
22
+ end
23
+
24
+ def exclude_ptn? node
25
+ ptn?(node, @exclude_ptn) if @exclude_ptn
26
+ end
27
+
28
+ def include_ptn? node
29
+ ptn?(node, @include_ptn)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,79 @@
1
+ module Docp
2
+ module TableRemoveMethods
3
+ def doc_remove_attributes(remove_doc)
4
+ spam = "//*[contains(@style,'display:none')]"
5
+ remove_doc.search(spam).remove
6
+ remove_doc.search('tr', 'th', 'td').each do|row|
7
+ row.attributes.each do|k, v|
8
+ row.delete(k)
9
+ end
10
+ end
11
+ end
12
+
13
+ def colspan_join parse_doc
14
+ parse_doc.search('tr').each_with_index {|tr, y|
15
+ next_tr = tr.next_element
16
+ tr.elements.each_with_index do|td, x|
17
+ next if td[:colspan].nil? || next_tr.nil?
18
+ col_depth = td[:colspan].to_i - 1
19
+ col_depth.downto(0).map do|xx|
20
+ next if next_tr.elements[xx].nil?
21
+ td.next = next_tr.elements[xx].clone.tap {|e|
22
+ e.content = td.text + " " + next_tr.elements[xx].text
23
+ }
24
+ next_tr.elements[xx]
25
+ end.compact.map(&:remove)
26
+ td.remove
27
+ end
28
+
29
+ # tr.elements.each do|ch|
30
+ # ch.attributes.each do|k, v|
31
+ # ch.delete(k) if k=="colspan"
32
+ # end
33
+ # end
34
+ }
35
+ end
36
+
37
+ def rowspan_join parse_doc
38
+ parse_doc.search('tr').each_with_index {|tr, y|
39
+ row_depth = 0
40
+ no_rowspans = []
41
+ tr.elements.each do|td|
42
+ if td[:rowspan]
43
+ row_depth = td[:rowspan].to_i-1
44
+ else
45
+ no_rowspans << td
46
+ end
47
+ end
48
+
49
+ if row_depth > 0
50
+ row_depth.times do
51
+ if tr.next_element
52
+ tr.next_element.elements.each_with_index do|td, i|
53
+ if no_rowspans[i]
54
+ no_rowspans[i].content = "#{no_rowspans[i].text} #{td.text}"
55
+ else
56
+ tr.add_child td
57
+ end
58
+ end
59
+ tr.next_element.remove
60
+ end
61
+ end
62
+ end
63
+ }
64
+ end
65
+
66
+ def rowspan_flatten parse_doc
67
+ parse_doc.search('tr').each_with_index {|tr, y|
68
+ row_depth = tr.elements.map {|td| td[:rowspan].to_i - 1 if td[:rowspan]}.compact.sort[-1]
69
+ next if row_depth.nil?
70
+ row_depth.times do
71
+ if tr.next_element
72
+ tr.add_child tr.next_element.elements
73
+ tr.next_element.remove
74
+ end
75
+ end
76
+ }
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,73 @@
1
+ module Docp
2
+ class TableRow
3
+ extend Forwardable
4
+ include Enumerable
5
+ def_delegators :@row, :empty?, :any?, :count
6
+ def_delegators :@tr, :at, :search, :elements, :row_elements
7
+ attr_reader :tr
8
+ attr_reader :formats
9
+ def initialize tr, header_parser
10
+ @tr = tr
11
+ @row = row_elements.select {|td| td[:class] }
12
+ @no_hash_keys = header_parser.no_hash_keys
13
+ @after_to_hash = header_parser.after_to_hash
14
+ @formats = {}
15
+ header_parser.columns.each {|col|
16
+ [col, col.children].flatten.each {|ch|
17
+ if @tr[:class] == "table-header"
18
+ @formats[ch.name] = format(self[ch.name], :text)
19
+ else
20
+ @formats[ch.name] = format(self[ch.name], ch.format)
21
+ end
22
+ }
23
+ }
24
+ end
25
+
26
+ def [] name
27
+ ret = case name
28
+ when Symbol, String
29
+ name = name.to_s if name.is_a?(Symbol)
30
+ @row.find {|r|
31
+ r[:class] == name || r[:class].split(',').map {|cl| cl == name}.any?
32
+ }
33
+ else
34
+ @row[name]
35
+ end
36
+ if ret
37
+ ret
38
+ else
39
+ doc = Nokogiri::HTML::DocumentFragment.parse ""
40
+ Nokogiri::XML::Element.new "td", doc
41
+ end
42
+ end
43
+
44
+ def format td, format
45
+ if format.is_a?(Symbol)
46
+ td.send(format)
47
+ elsif format.is_a?(Proc)
48
+ par = format.parameters.map(&:last).map
49
+ if par.include?(:formats)
50
+ -> { format.call(*par.map {|name| name == :row ? self : eval(name.to_s) }) }
51
+ else
52
+ format.call(*par.map{|name| name == :row ? self : eval(name.to_s) })
53
+ end
54
+ else
55
+ format
56
+ end
57
+ end
58
+
59
+ def each
60
+ @row.each {|td| yield td}
61
+ end
62
+
63
+ def to_hash
64
+ ret = {}
65
+ @formats.each {|k, v|
66
+ next if @no_hash_keys.include?(k)
67
+ ret[k] = v.is_a?(Proc) ? v.call : v
68
+ }
69
+ @after_to_hash.call(ret, self) if @after_to_hash
70
+ ret
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,3 @@
1
+ module Docp
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,161 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: docp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - akiaki0
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-05-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest-reporters
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 1.0.5
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.0.5
83
+ - !ruby/object:Gem::Dependency
84
+ name: mini_backtrace
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 0.1.3
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 0.1.3
97
+ - !ruby/object:Gem::Dependency
98
+ name: guard-minitest
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '='
102
+ - !ruby/object:Gem::Version
103
+ version: 2.3.1
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '='
109
+ - !ruby/object:Gem::Version
110
+ version: 2.3.1
111
+ description: html table parse gem
112
+ email:
113
+ - akiaki0pon@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - ".travis.yml"
120
+ - CODE_OF_CONDUCT.md
121
+ - Gemfile
122
+ - Guardfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - bin/console
127
+ - bin/setup
128
+ - docp.gemspec
129
+ - lib/docp.rb
130
+ - lib/docp/table.rb
131
+ - lib/docp/table_doc.rb
132
+ - lib/docp/table_header.rb
133
+ - lib/docp/table_header_ptn.rb
134
+ - lib/docp/table_remove_methods.rb
135
+ - lib/docp/table_row.rb
136
+ - lib/docp/version.rb
137
+ homepage: https://github.com/akiaki0/docp
138
+ licenses:
139
+ - MIT
140
+ metadata: {}
141
+ post_install_message:
142
+ rdoc_options: []
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ required_rubygems_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
155
+ requirements: []
156
+ rubyforge_project:
157
+ rubygems_version: 2.5.1
158
+ signing_key:
159
+ specification_version: 4
160
+ summary: html table parse gem
161
+ test_files: []