docp 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/Guardfile +17 -0
- data/LICENSE.txt +21 -0
- data/README.md +41 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/docp.gemspec +37 -0
- data/lib/docp.rb +15 -0
- data/lib/docp/table.rb +183 -0
- data/lib/docp/table_doc.rb +41 -0
- data/lib/docp/table_header.rb +134 -0
- data/lib/docp/table_header_ptn.rb +32 -0
- data/lib/docp/table_remove_methods.rb +79 -0
- data/lib/docp/table_row.rb +73 -0
- data/lib/docp/version.rb +3 -0
- metadata +161 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 81e87ebf53a7083344fd962f72d6c4c042b9c39a
|
4
|
+
data.tar.gz: f19853fb32210c0dead362e9b402da31907ef771
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 25730126b620964b8013a945fb120d7ee6bd64c0358e418c2972680af5d378fd3e0ca09b771e90dc03d5151ed286c2a2d76fbbdaebf68e868ba800bc07a33ec7
|
7
|
+
data.tar.gz: f6c7c10ce504e706e210db8f2db43341ef7554199e75dd78d0bf84d1184b5dfeac9fe0ce6a61a64dcb1006a257ad954edb04c16977a7103be66583789251c808
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, and in the interest of
|
4
|
+
fostering an open and welcoming community, we pledge to respect all people who
|
5
|
+
contribute through reporting issues, posting feature requests, updating
|
6
|
+
documentation, submitting pull requests or patches, and other activities.
|
7
|
+
|
8
|
+
We are committed to making participation in this project a harassment-free
|
9
|
+
experience for everyone, regardless of level of experience, gender, gender
|
10
|
+
identity and expression, sexual orientation, disability, personal appearance,
|
11
|
+
body size, race, ethnicity, age, religion, or nationality.
|
12
|
+
|
13
|
+
Examples of unacceptable behavior by participants include:
|
14
|
+
|
15
|
+
* The use of sexualized language or imagery
|
16
|
+
* Personal attacks
|
17
|
+
* Trolling or insulting/derogatory comments
|
18
|
+
* Public or private harassment
|
19
|
+
* Publishing other's private information, such as physical or electronic
|
20
|
+
addresses, without explicit permission
|
21
|
+
* Other unethical or unprofessional conduct
|
22
|
+
|
23
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
24
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
25
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
26
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
27
|
+
threatening, offensive, or harmful.
|
28
|
+
|
29
|
+
By adopting this Code of Conduct, project maintainers commit themselves to
|
30
|
+
fairly and consistently applying these principles to every aspect of managing
|
31
|
+
this project. Project maintainers who do not follow or enforce the Code of
|
32
|
+
Conduct may be permanently removed from the project team.
|
33
|
+
|
34
|
+
This code of conduct applies both within project spaces and in public spaces
|
35
|
+
when an individual is representing the project or its community.
|
36
|
+
|
37
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
38
|
+
reported by contacting a project maintainer at tarou1y@gmail.com. All
|
39
|
+
complaints will be reviewed and investigated and will result in a response that
|
40
|
+
is deemed necessary and appropriate to the circumstances. Maintainers are
|
41
|
+
obligated to maintain confidentiality with regard to the reporter of an
|
42
|
+
incident.
|
43
|
+
|
44
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
45
|
+
version 1.3.0, available at
|
46
|
+
[http://contributor-covenant.org/version/1/3/0/][version]
|
47
|
+
|
48
|
+
[homepage]: http://contributor-covenant.org
|
49
|
+
[version]: http://contributor-covenant.org/version/1/3/0/
|
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
base = File.expand_path("../", __FILE__)
|
2
|
+
|
3
|
+
guard :minitest do
|
4
|
+
watch(%r{^test/(.*)/?(.*)_test\.rb$}) {|m| "test/#{m[1]}_test.rb" }
|
5
|
+
watch(%r{^lib/docp/(.*)\.rb$}) {|m| "test/#{m[1]}_test.rb" }
|
6
|
+
|
7
|
+
watch(%r{^test/(.*)/integration/?(.*)_test\.rb$}) {|m| "test/#{m[1]}_test.rb" }
|
8
|
+
watch(%r{^lib/docp/(.*)\.rb$}) { integration_tests() }
|
9
|
+
end
|
10
|
+
|
11
|
+
def integration_tests(resource = :all)
|
12
|
+
if resource == :all
|
13
|
+
Dir["test/integration/*"]
|
14
|
+
else
|
15
|
+
Dir["test/integration/#{resource}_*.rb"]
|
16
|
+
end
|
17
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 dalks
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Docp
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/docp`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'docp'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install docp
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/docp. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
36
|
+
|
37
|
+
|
38
|
+
## License
|
39
|
+
|
40
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
41
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "docp"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/docp.gemspec
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'docp/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "docp"
|
8
|
+
spec.version = Docp::VERSION
|
9
|
+
spec.authors = ["akiaki0"]
|
10
|
+
spec.email = ["akiaki0pon@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{html table parse gem}
|
13
|
+
spec.description = %q{html table parse gem}
|
14
|
+
spec.homepage = "https://github.com/akiaki0/docp"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
18
|
+
# delete this section to allow pushing this gem to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
# else
|
22
|
+
# raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
23
|
+
# end
|
24
|
+
|
25
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
33
|
+
spec.add_development_dependency "nokogiri"
|
34
|
+
spec.add_development_dependency 'minitest-reporters', '1.0.5'
|
35
|
+
spec.add_development_dependency 'mini_backtrace', '0.1.3'
|
36
|
+
spec.add_development_dependency 'guard-minitest', '2.3.1'
|
37
|
+
end
|
data/lib/docp.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'nokogiri'
|
3
|
+
require "docp/version"
|
4
|
+
require "docp/table_header"
|
5
|
+
require "docp/table"
|
6
|
+
|
7
|
+
class String
|
8
|
+
def del_space
|
9
|
+
gsub(/[[:space:][:cntrl:]]/, "")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module Docp
|
14
|
+
# Your code goes here...
|
15
|
+
end
|
data/lib/docp/table.rb
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'docp/table_doc'
|
2
|
+
require 'docp/table_row'
|
3
|
+
require 'docp/table_remove_methods'
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
class XML::Element
|
7
|
+
def row_elements
|
8
|
+
search('*').select {|el| ['td', 'th'].include?(el.name)}
|
9
|
+
# [elements, search('*//td', '*//th')].sort {|a, b| a.count <=> b.count}[-1]
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module Docp
|
15
|
+
class Table
|
16
|
+
class << self
|
17
|
+
def parse parse_doc, header_parser, &block
|
18
|
+
TableDoc.new.parse(parse_doc, header_parser, &block)
|
19
|
+
end
|
20
|
+
|
21
|
+
def find src_doc, header_parser
|
22
|
+
header_parser = header_parser.is_a?(Hash) ? TableHeader.new(nil, header_parser) : header_parser
|
23
|
+
src_doc = Nokogiri::HTML(src_doc) if src_doc.is_a?(String)
|
24
|
+
parse_doc = src_doc.respond_to?(:to_html) ? Nokogiri::HTML(src_doc.to_html) : Nokogiri::HTML(src_doc.parser.to_html)
|
25
|
+
parse_doc.search('//table').each {|table, i|
|
26
|
+
next if table.at('table') || table.at('tr table')
|
27
|
+
table.search('tr').map.with_index do|tr, header_index|
|
28
|
+
break if header_parser.exclude_ptn?(tr)
|
29
|
+
next unless header_parser.include_ptn?(tr)
|
30
|
+
next unless header_parser.required_all?(tr)
|
31
|
+
yield table, tr, header_index
|
32
|
+
break
|
33
|
+
end
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def header_required_all? header_tr, header_parser
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
extend TableRemoveMethods
|
43
|
+
extend Forwardable
|
44
|
+
include TableRemoveMethods
|
45
|
+
include Enumerable
|
46
|
+
class HeaderCountNotMatchError < StandardError; end
|
47
|
+
class RequiredAttributesUndefined < StandardError; end
|
48
|
+
|
49
|
+
def_delegators :@this_table, :at, :search, :elements, :row_elements
|
50
|
+
attr_reader :doc
|
51
|
+
attr_reader :header_required_undefineds
|
52
|
+
def initialize doc, header_parser, table, header_tr, header_index
|
53
|
+
@doc = doc
|
54
|
+
@this_table = Nokogiri::XML::Element.new("table", @doc)
|
55
|
+
@header_parser = header_parser.child || header_parser
|
56
|
+
if @header_parser.columns.any?
|
57
|
+
parse_table table, header_tr, header_index
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def parse_table table, header_tr, header_index
|
62
|
+
@header_parser.before_parse.call(table) if @header_parser.before_parse
|
63
|
+
doc_remove_attributes(table)
|
64
|
+
if @header_parser.vertical
|
65
|
+
header_tr = @this_table.add_child Nokogiri::XML::Element.new("tr", @doc)
|
66
|
+
row_tr = @this_table.add_child Nokogiri::XML::Element.new("tr", @doc)
|
67
|
+
header_tr[:class] = "table-header"
|
68
|
+
table.row_elements.each do|td|
|
69
|
+
if col = @header_parser.columns.find {|c| c.include_ptn?(td)}
|
70
|
+
cltd = td.clone
|
71
|
+
cltd[:class] = col.name
|
72
|
+
header_tr.add_child cltd.clone
|
73
|
+
if ntd = td.next_element
|
74
|
+
ntd[:class] = ntd[:class] ? "#{ntd[:class]},#{col.name}" : col.name
|
75
|
+
row_tr.add_child ntd.clone
|
76
|
+
else
|
77
|
+
#raise "NextElementNotfound #{ntd.class} #{ntd}\n"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
#set_vertical_row_attributes(header_tr)
|
82
|
+
@doc.add_child(@this_table)
|
83
|
+
else
|
84
|
+
#if header_required_all?(header_tr)
|
85
|
+
if row_elements = table.search('tr')[header_index..-1]
|
86
|
+
header_tr[:class] = "table-header"
|
87
|
+
@this_table.add_child row_elements
|
88
|
+
set_header_attributes(header_tr)
|
89
|
+
set_row_attributes(header_tr, @this_table.search('tr')[1..-1])
|
90
|
+
@doc.add_child(@this_table)
|
91
|
+
end
|
92
|
+
#end
|
93
|
+
end
|
94
|
+
self
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_row_class_names tr_elements
|
98
|
+
tr_elements.map {|td|
|
99
|
+
next unless td[:class]
|
100
|
+
td[:class].split(",").map(&:to_sym)
|
101
|
+
}.compact.flatten
|
102
|
+
end
|
103
|
+
|
104
|
+
def row_required_all? tr_elements
|
105
|
+
ret = get_row_class_names(tr_elements).select {|name| @header_parser.required_keys.include?(name)}
|
106
|
+
ret.count >= @header_parser.required_keys.count
|
107
|
+
end
|
108
|
+
|
109
|
+
def extend_row tr
|
110
|
+
TableRow.new(tr, @header_parser)
|
111
|
+
end
|
112
|
+
|
113
|
+
def errors
|
114
|
+
mes = @this_table.search('tr').map {|tr| tr[:error]}.compact
|
115
|
+
mes
|
116
|
+
end
|
117
|
+
|
118
|
+
def each args = {}
|
119
|
+
@this_table.search('tr').each {|tr|
|
120
|
+
header = tr.at('.table-header')
|
121
|
+
next if args[:header].nil? && tr[:class] == "table-header"
|
122
|
+
next if tr.row_elements.select {|td| td[:class]}.empty?
|
123
|
+
yield extend_row(header) if args[:header]
|
124
|
+
if row_required_all?(tr.row_elements)
|
125
|
+
yield extend_row(tr)
|
126
|
+
end
|
127
|
+
}
|
128
|
+
end
|
129
|
+
|
130
|
+
def header
|
131
|
+
#@this_table.at('.table-header')
|
132
|
+
extend_row @this_table.at('.table-header')
|
133
|
+
end
|
134
|
+
|
135
|
+
def rows args = {}
|
136
|
+
[].tap {|ret|
|
137
|
+
each(args) {|row|
|
138
|
+
ret << row.tap {|r| yield r if block_given?} }
|
139
|
+
}
|
140
|
+
end
|
141
|
+
|
142
|
+
# alias :rows :map
|
143
|
+
alias :rows_each :each
|
144
|
+
alias :rows_each_with_index :each_with_index
|
145
|
+
|
146
|
+
# def set_vertical_row_attributes tr
|
147
|
+
# tr.row_elements.each {|td|
|
148
|
+
# @header_parser.columns.each do|col|
|
149
|
+
# if col.include_ptn?(td)
|
150
|
+
# if ntd = td.next_element
|
151
|
+
# ntd[:class] = ntd[:class] ? "#{ntd[:class]},#{col.name}" : col.name
|
152
|
+
# end
|
153
|
+
# end
|
154
|
+
# end
|
155
|
+
# }
|
156
|
+
# end
|
157
|
+
|
158
|
+
def set_header_attributes tr
|
159
|
+
tr.row_elements.each {|td|
|
160
|
+
@header_parser.columns.each do|col|
|
161
|
+
if col.include_ptn?(td)
|
162
|
+
td[:class] = td[:class] ? "#{td[:class]},#{col.name}" : col.name
|
163
|
+
end
|
164
|
+
end
|
165
|
+
}
|
166
|
+
end
|
167
|
+
|
168
|
+
def set_row_attributes header_tr, tr_rows
|
169
|
+
tr_rows.each_with_index {|tr, i|
|
170
|
+
if header_tr.row_elements.count != tr.row_elements.count
|
171
|
+
tr[:error] = "#{HeaderCountNotMatchError}"
|
172
|
+
end
|
173
|
+
header_tr.row_elements.each_with_index do|h, x|
|
174
|
+
next if h[:class].nil? || tr.row_elements[x].nil?
|
175
|
+
tr.row_elements[x][:class] = h[:class] if h[:class]
|
176
|
+
end
|
177
|
+
unless row_required_all?(tr.row_elements)
|
178
|
+
tr[:error] = "#{RequiredAttributesUndefined}"
|
179
|
+
end
|
180
|
+
}
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Docp
|
2
|
+
class TableDoc
|
3
|
+
include Enumerable
|
4
|
+
attr_reader :doc, :tables
|
5
|
+
def initialize
|
6
|
+
@doc = Nokogiri::HTML::DocumentFragment.parse ""
|
7
|
+
@tables = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse parse_doc, header_parser, &block
|
11
|
+
Docp::Table.find(parse_doc, header_parser) do|table, header_tr, header_index|
|
12
|
+
@tables << Docp::Table.new(@doc, header_parser, table, header_tr, header_index)
|
13
|
+
block.call @tables.last if block_given?
|
14
|
+
end
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
def each
|
19
|
+
@tables.each {|table| yield table }
|
20
|
+
end
|
21
|
+
|
22
|
+
def rows &block
|
23
|
+
@tables.map(&:rows).flatten.map {|row|
|
24
|
+
yield row if block_given?
|
25
|
+
row
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
def header_required_undefineds
|
30
|
+
@tables.map(&:header_required_undefineds).compact
|
31
|
+
end
|
32
|
+
|
33
|
+
def empty?
|
34
|
+
@tables.empty?
|
35
|
+
end
|
36
|
+
|
37
|
+
def any?
|
38
|
+
@tables.any?
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'docp/table_header_ptn'
|
2
|
+
module Docp
|
3
|
+
class TableHeader
|
4
|
+
include TableHeaderPtn
|
5
|
+
|
6
|
+
attr_reader :columns
|
7
|
+
attr_reader :match_block
|
8
|
+
attr_reader :child
|
9
|
+
attr_accessor :required_attributes
|
10
|
+
|
11
|
+
#TableOption
|
12
|
+
attr_accessor :before_parse
|
13
|
+
attr_accessor :vertical
|
14
|
+
|
15
|
+
#RowOption
|
16
|
+
attr_accessor :default_format
|
17
|
+
attr_accessor :after_to_hash
|
18
|
+
|
19
|
+
alias :required_keys :required_attributes
|
20
|
+
|
21
|
+
def initialize match = nil, args = {}, &block
|
22
|
+
@columns = []
|
23
|
+
@required_columns = []
|
24
|
+
@required_attributes = []
|
25
|
+
args.each {|k, v| send("#{k}=", v) }
|
26
|
+
@default_format ||= :text
|
27
|
+
block.call(self, match)
|
28
|
+
set_required_columns
|
29
|
+
end
|
30
|
+
|
31
|
+
def match_block &block
|
32
|
+
@match_block = block if block_given?
|
33
|
+
@match_block
|
34
|
+
end
|
35
|
+
|
36
|
+
#if match_block CreateSelfInstance & Schema == Child
|
37
|
+
def include_ptn? tr
|
38
|
+
[@include_ptn || @columns.map(&:include_ptn)].flatten.map.with_index do|ptn, i|
|
39
|
+
if ptn?(tr.search('*'), ptn)
|
40
|
+
match = {ptn: ptn, index: i, tr: tr}
|
41
|
+
@child = TableHeader.new match, &@match_block if @match_block
|
42
|
+
if @child
|
43
|
+
return !@child.exclude_ptn?(tr.search('*'))
|
44
|
+
else
|
45
|
+
return true
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end.any?
|
49
|
+
end
|
50
|
+
|
51
|
+
def [] name
|
52
|
+
name = name.to_sym if name.is_a?(String)
|
53
|
+
@columns.find {|col| col.name == name}
|
54
|
+
end
|
55
|
+
|
56
|
+
def set_required_columns
|
57
|
+
#keys = @required_attributes.select {|name| !@columns.find {|col| col.name == name} }
|
58
|
+
#raise "Column NotFound #{keys}" if keys.any?
|
59
|
+
if @required_attributes.any?
|
60
|
+
@columns.select do|col|
|
61
|
+
f = @required_attributes.find {|name| col.name == name}
|
62
|
+
f ? col.required = true : col.required = false
|
63
|
+
end
|
64
|
+
else
|
65
|
+
required_columns.map {|col| col.required = false }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def required_attributes= names
|
70
|
+
@required_attributes = [names].flatten.compact
|
71
|
+
set_required_columns
|
72
|
+
end
|
73
|
+
|
74
|
+
def required_columns
|
75
|
+
@columns.select {|col| col.required}
|
76
|
+
end
|
77
|
+
|
78
|
+
def required_all? tr
|
79
|
+
return true if required_columns.empty?
|
80
|
+
cols = required_columns.select {|col| col.include_ptn?(tr.row_elements) }
|
81
|
+
if cols.count >= required_keys.count
|
82
|
+
true
|
83
|
+
else
|
84
|
+
keys = required_keys.dup
|
85
|
+
cols.each {|col| keys.delete(col.name)}
|
86
|
+
#header_required_undefineds = { keys: keys, tr: tr.clone }
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def no_hash_keys
|
92
|
+
@columns.select(&:no_hash).map(&:name)
|
93
|
+
end
|
94
|
+
|
95
|
+
def add h
|
96
|
+
col = Column.new(h.merge(default_format: @default_format))
|
97
|
+
yield col if block_given?
|
98
|
+
@columns.push(*[col, col.children].flatten)
|
99
|
+
col
|
100
|
+
end
|
101
|
+
|
102
|
+
def swap h
|
103
|
+
col = Column.new(h.merge(default_format: @default_format))
|
104
|
+
if i = @columns.index {|ch| ch.name == col.name}
|
105
|
+
@columns[i] = col
|
106
|
+
else
|
107
|
+
raise ArgumentError, "#{col.name} ColumnNotFound"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class Column
|
112
|
+
include TableHeaderPtn
|
113
|
+
attr_reader :name
|
114
|
+
attr_reader :include_ptn
|
115
|
+
attr_reader :no_hash
|
116
|
+
attr_reader :children
|
117
|
+
|
118
|
+
attr_accessor :required
|
119
|
+
attr_accessor :format
|
120
|
+
def initialize hash
|
121
|
+
@children = []
|
122
|
+
@name, @include_ptn = hash.shift
|
123
|
+
@format = hash[:format] || hash[:default_format]
|
124
|
+
@no_hash = hash[:no_hash]
|
125
|
+
end
|
126
|
+
|
127
|
+
def add name, hash
|
128
|
+
@no_hash = true
|
129
|
+
ch = Column.new( { name => nil, }.merge(hash) )
|
130
|
+
@children << ch
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Docp
|
2
|
+
module TableHeaderPtn
|
3
|
+
attr_accessor :include_ptn, :exclude_ptn
|
4
|
+
attr_accessor :after_check_val
|
5
|
+
def check_ptn elem, ptn
|
6
|
+
text = after_check_val ? after_check_val.call(elem) : elem.text.del_space
|
7
|
+
if ptn.is_a?(Regexp)
|
8
|
+
text =~ ptn
|
9
|
+
else
|
10
|
+
[ptn].flatten.find {|v|
|
11
|
+
if v.is_a?(Regexp)
|
12
|
+
text =~ v
|
13
|
+
else
|
14
|
+
text == v
|
15
|
+
end
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def ptn? elems, ptn
|
21
|
+
[elems].flatten.find {|el| check_ptn(el, ptn) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def exclude_ptn? node
|
25
|
+
ptn?(node, @exclude_ptn) if @exclude_ptn
|
26
|
+
end
|
27
|
+
|
28
|
+
def include_ptn? node
|
29
|
+
ptn?(node, @include_ptn)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Docp
|
2
|
+
module TableRemoveMethods
|
3
|
+
def doc_remove_attributes(remove_doc)
|
4
|
+
spam = "//*[contains(@style,'display:none')]"
|
5
|
+
remove_doc.search(spam).remove
|
6
|
+
remove_doc.search('tr', 'th', 'td').each do|row|
|
7
|
+
row.attributes.each do|k, v|
|
8
|
+
row.delete(k)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def colspan_join parse_doc
|
14
|
+
parse_doc.search('tr').each_with_index {|tr, y|
|
15
|
+
next_tr = tr.next_element
|
16
|
+
tr.elements.each_with_index do|td, x|
|
17
|
+
next if td[:colspan].nil? || next_tr.nil?
|
18
|
+
col_depth = td[:colspan].to_i - 1
|
19
|
+
col_depth.downto(0).map do|xx|
|
20
|
+
next if next_tr.elements[xx].nil?
|
21
|
+
td.next = next_tr.elements[xx].clone.tap {|e|
|
22
|
+
e.content = td.text + " " + next_tr.elements[xx].text
|
23
|
+
}
|
24
|
+
next_tr.elements[xx]
|
25
|
+
end.compact.map(&:remove)
|
26
|
+
td.remove
|
27
|
+
end
|
28
|
+
|
29
|
+
# tr.elements.each do|ch|
|
30
|
+
# ch.attributes.each do|k, v|
|
31
|
+
# ch.delete(k) if k=="colspan"
|
32
|
+
# end
|
33
|
+
# end
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def rowspan_join parse_doc
|
38
|
+
parse_doc.search('tr').each_with_index {|tr, y|
|
39
|
+
row_depth = 0
|
40
|
+
no_rowspans = []
|
41
|
+
tr.elements.each do|td|
|
42
|
+
if td[:rowspan]
|
43
|
+
row_depth = td[:rowspan].to_i-1
|
44
|
+
else
|
45
|
+
no_rowspans << td
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
if row_depth > 0
|
50
|
+
row_depth.times do
|
51
|
+
if tr.next_element
|
52
|
+
tr.next_element.elements.each_with_index do|td, i|
|
53
|
+
if no_rowspans[i]
|
54
|
+
no_rowspans[i].content = "#{no_rowspans[i].text} #{td.text}"
|
55
|
+
else
|
56
|
+
tr.add_child td
|
57
|
+
end
|
58
|
+
end
|
59
|
+
tr.next_element.remove
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def rowspan_flatten parse_doc
|
67
|
+
parse_doc.search('tr').each_with_index {|tr, y|
|
68
|
+
row_depth = tr.elements.map {|td| td[:rowspan].to_i - 1 if td[:rowspan]}.compact.sort[-1]
|
69
|
+
next if row_depth.nil?
|
70
|
+
row_depth.times do
|
71
|
+
if tr.next_element
|
72
|
+
tr.add_child tr.next_element.elements
|
73
|
+
tr.next_element.remove
|
74
|
+
end
|
75
|
+
end
|
76
|
+
}
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Docp
|
2
|
+
class TableRow
|
3
|
+
extend Forwardable
|
4
|
+
include Enumerable
|
5
|
+
def_delegators :@row, :empty?, :any?, :count
|
6
|
+
def_delegators :@tr, :at, :search, :elements, :row_elements
|
7
|
+
attr_reader :tr
|
8
|
+
attr_reader :formats
|
9
|
+
def initialize tr, header_parser
|
10
|
+
@tr = tr
|
11
|
+
@row = row_elements.select {|td| td[:class] }
|
12
|
+
@no_hash_keys = header_parser.no_hash_keys
|
13
|
+
@after_to_hash = header_parser.after_to_hash
|
14
|
+
@formats = {}
|
15
|
+
header_parser.columns.each {|col|
|
16
|
+
[col, col.children].flatten.each {|ch|
|
17
|
+
if @tr[:class] == "table-header"
|
18
|
+
@formats[ch.name] = format(self[ch.name], :text)
|
19
|
+
else
|
20
|
+
@formats[ch.name] = format(self[ch.name], ch.format)
|
21
|
+
end
|
22
|
+
}
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
def [] name
|
27
|
+
ret = case name
|
28
|
+
when Symbol, String
|
29
|
+
name = name.to_s if name.is_a?(Symbol)
|
30
|
+
@row.find {|r|
|
31
|
+
r[:class] == name || r[:class].split(',').map {|cl| cl == name}.any?
|
32
|
+
}
|
33
|
+
else
|
34
|
+
@row[name]
|
35
|
+
end
|
36
|
+
if ret
|
37
|
+
ret
|
38
|
+
else
|
39
|
+
doc = Nokogiri::HTML::DocumentFragment.parse ""
|
40
|
+
Nokogiri::XML::Element.new "td", doc
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def format td, format
|
45
|
+
if format.is_a?(Symbol)
|
46
|
+
td.send(format)
|
47
|
+
elsif format.is_a?(Proc)
|
48
|
+
par = format.parameters.map(&:last).map
|
49
|
+
if par.include?(:formats)
|
50
|
+
-> { format.call(*par.map {|name| name == :row ? self : eval(name.to_s) }) }
|
51
|
+
else
|
52
|
+
format.call(*par.map{|name| name == :row ? self : eval(name.to_s) })
|
53
|
+
end
|
54
|
+
else
|
55
|
+
format
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def each
|
60
|
+
@row.each {|td| yield td}
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_hash
|
64
|
+
ret = {}
|
65
|
+
@formats.each {|k, v|
|
66
|
+
next if @no_hash_keys.include?(k)
|
67
|
+
ret[k] = v.is_a?(Proc) ? v.call : v
|
68
|
+
}
|
69
|
+
@after_to_hash.call(ret, self) if @after_to_hash
|
70
|
+
ret
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/docp/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: docp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- akiaki0
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-05-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: nokogiri
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: minitest-reporters
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.0.5
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.0.5
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: mini_backtrace
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.1.3
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.1.3
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: guard-minitest
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 2.3.1
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 2.3.1
|
111
|
+
description: html table parse gem
|
112
|
+
email:
|
113
|
+
- akiaki0pon@gmail.com
|
114
|
+
executables: []
|
115
|
+
extensions: []
|
116
|
+
extra_rdoc_files: []
|
117
|
+
files:
|
118
|
+
- ".gitignore"
|
119
|
+
- ".travis.yml"
|
120
|
+
- CODE_OF_CONDUCT.md
|
121
|
+
- Gemfile
|
122
|
+
- Guardfile
|
123
|
+
- LICENSE.txt
|
124
|
+
- README.md
|
125
|
+
- Rakefile
|
126
|
+
- bin/console
|
127
|
+
- bin/setup
|
128
|
+
- docp.gemspec
|
129
|
+
- lib/docp.rb
|
130
|
+
- lib/docp/table.rb
|
131
|
+
- lib/docp/table_doc.rb
|
132
|
+
- lib/docp/table_header.rb
|
133
|
+
- lib/docp/table_header_ptn.rb
|
134
|
+
- lib/docp/table_remove_methods.rb
|
135
|
+
- lib/docp/table_row.rb
|
136
|
+
- lib/docp/version.rb
|
137
|
+
homepage: https://github.com/akiaki0/docp
|
138
|
+
licenses:
|
139
|
+
- MIT
|
140
|
+
metadata: {}
|
141
|
+
post_install_message:
|
142
|
+
rdoc_options: []
|
143
|
+
require_paths:
|
144
|
+
- lib
|
145
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
146
|
+
requirements:
|
147
|
+
- - ">="
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: '0'
|
150
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
requirements: []
|
156
|
+
rubyforge_project:
|
157
|
+
rubygems_version: 2.5.1
|
158
|
+
signing_key:
|
159
|
+
specification_version: 4
|
160
|
+
summary: html table parse gem
|
161
|
+
test_files: []
|