yasuri 2.0.12 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,8 +11,7 @@ describe 'Yasuri' do
11
11
 
12
12
  describe '::StructNode' do
13
13
  before do
14
- @agent = Mechanize.new
15
- @page = @agent.get(uri + "/struct/structual_text.html")
14
+ @uri = uri + "/struct/structual_text.html"
16
15
 
17
16
  @table_1996 = [
18
17
  { "title" => "The Perfect Insider",
@@ -53,7 +52,7 @@ describe 'Yasuri' do
53
52
  Yasuri::TextNode.new('./td[2]', "pub_date"),
54
53
  ])
55
54
  expected = @table_1996
56
- actual = node.inject(@agent, @page)
55
+ actual = node.scrape(@uri)
57
56
  expect(actual).to match expected
58
57
  end
59
58
 
@@ -63,7 +62,7 @@ describe 'Yasuri' do
63
62
  Yasuri::TextNode.new('./td[2]', "pub_date"),
64
63
  ])
65
64
  expected = @table_1996.first
66
- actual = node.inject(@agent, @page)
65
+ actual = node.scrape(@uri)
67
66
  expect(actual).to match expected
68
67
  end
69
68
 
@@ -72,7 +71,7 @@ describe 'Yasuri' do
72
71
  node = Yasuri::StructNode.new(no_match_xpath, "table", [
73
72
  Yasuri::TextNode.new('./td[1]', "title")
74
73
  ])
75
- actual = node.inject(@agent, @page)
74
+ actual = node.scrape(@uri)
76
75
  expect(actual).to be_empty
77
76
  end
78
77
 
@@ -81,7 +80,7 @@ describe 'Yasuri' do
81
80
  node = Yasuri::StructNode.new(invalid_xpath, "table", [
82
81
  Yasuri::TextNode.new('./td[1]', "title")
83
82
  ])
84
- expect { node.inject(@agent, @page) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
83
+ expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
85
84
  end
86
85
 
87
86
  it 'fail with invalid xpath in children' do
@@ -90,7 +89,7 @@ describe 'Yasuri' do
90
89
  Yasuri::TextNode.new(invalid_xpath, "title"),
91
90
  Yasuri::TextNode.new('./td[2]', "pub_date"),
92
91
  ])
93
- expect { node.inject(@agent, @page) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
92
+ expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
94
93
  end
95
94
 
96
95
  it 'scrape all tables' do
@@ -101,7 +100,7 @@ describe 'Yasuri' do
101
100
  ])
102
101
  ])
103
102
  expected = @all_tables
104
- actual = node.inject(@agent, @page)
103
+ actual = node.scrape(@uri)
105
104
  expect(actual).to match expected
106
105
  end
107
106
 
@@ -118,7 +117,7 @@ describe 'Yasuri' do
118
117
  Yasuri::TextNode.new('./td[2]', "pub_date"),
119
118
  ])
120
119
  ])
121
- compare_generated_vs_original(generated, original, @page)
120
+ compare_generated_vs_original(generated, original, @uri)
122
121
  end
123
122
 
124
123
  it 'return child node as symbol' do
@@ -127,7 +126,7 @@ describe 'Yasuri' do
127
126
  Yasuri::TextNode.new('./td[2]', "pub_date"),
128
127
  ])
129
128
  expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
130
- actual = node.inject(@agent, @page, symbolize_names:true)
129
+ actual = node.scrape(@uri, symbolize_names:true)
131
130
  expect(actual).to match expected
132
131
  end
133
132
 
@@ -135,9 +134,7 @@ describe 'Yasuri' do
135
134
 
136
135
  describe '::StructNode::Links' do
137
136
  before do
138
- @agent = Mechanize.new
139
- @page = @agent.get(uri + "/struct/structual_links.html")
140
-
137
+ @uri = uri + "/struct/structual_links.html"
141
138
  @table = [
142
139
  { "title" => "Child01,02",
143
140
  "child" => [{"p" => "Child 01 page."}, {"p" => "Child 02 page."}] },
@@ -155,22 +152,21 @@ describe 'Yasuri' do
155
152
  ])
156
153
  ])
157
154
  expected = @table
158
- actual = node.inject(@agent, @page)
155
+ actual = node.scrape(@uri)
159
156
  expect(actual).to match expected
160
157
  end
161
158
  end # descrive
162
159
 
163
160
  describe '::StructNode::Pages' do
164
161
  before do
165
- @agent = Mechanize.new
166
- @page = @agent.get(uri + "/struct/structual_text.html") #dummy
162
+ @uri = uri + "/struct/structual_text.html"
167
163
  end
168
164
 
169
165
  it 'not supported' do
170
166
  node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
171
167
  Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "pages", [])
172
168
  ])
173
- expect{ node.inject(@agent, @page) }.to raise_error(NotImplementedError, "PagenateNode inside StructNode, Not Supported")
169
+ expect{ node.scrape(@uri) }.to raise_error(NotImplementedError, "PagenateNode inside StructNode, Not Supported")
174
170
  end
175
171
  end
176
172
  end
@@ -10,69 +10,68 @@ describe 'Yasuri' do
10
10
  include_context 'httpserver'
11
11
 
12
12
  before do
13
- @agent = Mechanize.new
14
- @index_page = @agent.get(uri)
13
+ @uri = uri
15
14
  end
16
15
 
17
16
  describe '::TextNode' do
18
17
  before { @node = Yasuri::TextNode.new('/html/body/p[1]', "title") }
19
18
 
20
19
  it 'scrape text text <p>Hello,Yasuri</p>' do
21
- actual = @node.inject(@agent, @index_page)
20
+ actual = @node.scrape(@uri)
22
21
  expect(actual).to eq "Hello,Yasuri"
23
22
  end
24
23
 
25
24
  it 'return empty text if no match node' do
26
25
  no_match_node = Yasuri::TextNode.new('/html/body/no_match_node', "title")
27
- actual = no_match_node.inject(@agent, @index_page)
26
+ actual = no_match_node.scrape(@uri)
28
27
  expect(actual).to be_empty
29
28
  end
30
29
 
31
30
  it 'fail with invalid xpath' do
32
31
  invalid_xpath = '/html/body/no_match_node['
33
32
  node = Yasuri::TextNode.new(invalid_xpath, "title")
34
- expect { node.inject(@agent, @index_page) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
33
+ expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
35
34
  end
36
35
 
37
36
  it "can be defined by DSL, return single TextNode title" do
38
37
  generated = Yasuri.text_title '/html/body/p[1]'
39
38
  original = Yasuri::TextNode.new('/html/body/p[1]', "title")
40
- compare_generated_vs_original(generated, original, @index_page)
39
+ compare_generated_vs_original(generated, original, @uri)
41
40
  end
42
41
 
43
42
  it "can be truncated with regexp" do
44
43
  node = Yasuri.text_title '/html/body/p[1]', truncate:/^[^,]+/
45
- actual = node.inject(@agent, @index_page)
44
+ actual = node.scrape(@uri)
46
45
  expect(actual).to eq "Hello"
47
46
  end
48
47
 
49
48
  it "return first captured if matched given capture pattern" do
50
49
  node = Yasuri.text_title '/html/body/p[1]', truncate:/H(.+)i/
51
- actual = node.inject(@agent, @index_page)
50
+ actual = node.scrape(@uri)
52
51
  expect(actual).to eq "ello,Yasur"
53
52
  end
54
53
 
55
54
  it "can be truncated with regexp" do
56
55
  node = Yasuri.text_title '/html/body/p[1]', truncate:/[^,]+$/
57
- actual = node.inject(@agent, @index_page)
56
+ actual = node.scrape(@uri)
58
57
  expect(actual).to eq "Yasuri"
59
58
  end
60
59
 
61
60
  it "return empty string if truncated with no match to regexp" do
62
61
  node = Yasuri.text_title '/html/body/p[1]', truncate:/^hoge/
63
- actual = node.inject(@agent, @index_page)
62
+ actual = node.scrape(@uri)
64
63
  expect(actual).to be_empty
65
64
  end
66
65
 
67
66
  it "return symbol method applied string" do
68
67
  node = Yasuri.text_title '/html/body/p[1]', proc: :upcase
69
- actual = node.inject(@agent, @index_page)
68
+ actual = node.scrape(@uri)
70
69
  expect(actual).to eq "HELLO,YASURI"
71
70
  end
72
71
 
73
72
  it "return apply multi arguments" do
74
73
  node = Yasuri.text_title '/html/body/p[1]', proc: :upcase, truncate:/H(.+)i/
75
- actual = node.inject(@agent, @index_page)
74
+ actual = node.scrape(@uri)
76
75
  expect(actual).to eq "ELLO,YASUR"
77
76
  end
78
77
  end
data/yasuri.gemspec CHANGED
@@ -14,12 +14,13 @@ Gem::Specification.new do |spec|
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files -z`.split("\x0")
17
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.bindir = "exe"
18
19
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
20
  spec.require_paths = ["lib"]
20
21
 
21
- spec.add_development_dependency "bundler", "~> 1.7"
22
- spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "bundler"
23
+ spec.add_development_dependency "rake"
23
24
  spec.add_development_dependency "rspec"
24
25
  spec.add_development_dependency "fuubar"
25
26
  spec.add_development_dependency "glint"
@@ -28,4 +29,5 @@ Gem::Specification.new do |spec|
28
29
  spec.add_development_dependency "codeclimate-test-reporter"
29
30
 
30
31
  spec.add_dependency "mechanize"
32
+ spec.add_dependency "thor"
31
33
  end
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yasuri
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.12
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAC
8
- autorequire:
9
- bindir: bin
8
+ autorequire:
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-15 00:00:00.000000000 Z
11
+ date: 2021-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.7'
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.7'
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -136,16 +136,33 @@ dependencies:
136
136
  - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: thor
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
139
153
  description: Yasuri is an easy web-scraping library for supporting "Mechanize".
140
154
  email:
141
155
  - tac@tac42.net
142
- executables: []
156
+ executables:
157
+ - yasuri
143
158
  extensions: []
144
159
  extra_rdoc_files: []
145
160
  files:
146
161
  - ".coveralls.yml"
162
+ - ".github/workflows/ruby.yml"
147
163
  - ".gitignore"
148
164
  - ".rspec"
165
+ - ".ruby-version"
149
166
  - ".travis.yml"
150
167
  - Gemfile
151
168
  - LICENSE
@@ -153,16 +170,26 @@ files:
153
170
  - Rakefile
154
171
  - USAGE.ja.md
155
172
  - USAGE.md
156
- - app.rb
173
+ - examples/example.rb
174
+ - examples/github.yml
175
+ - examples/sample.json
176
+ - examples/sample.yml
177
+ - exe/yasuri
157
178
  - lib/yasuri.rb
158
179
  - lib/yasuri/version.rb
159
180
  - lib/yasuri/yasuri.rb
181
+ - lib/yasuri/yasuri_cli.rb
160
182
  - lib/yasuri/yasuri_links_node.rb
183
+ - lib/yasuri/yasuri_map_node.rb
161
184
  - lib/yasuri/yasuri_node.rb
162
185
  - lib/yasuri/yasuri_node_generator.rb
163
186
  - lib/yasuri/yasuri_paginate_node.rb
164
187
  - lib/yasuri/yasuri_struct_node.rb
165
188
  - lib/yasuri/yasuri_text_node.rb
189
+ - spec/cli_resources/tree.json
190
+ - spec/cli_resources/tree.yml
191
+ - spec/cli_resources/tree_wrong.json
192
+ - spec/cli_resources/tree_wrong.yml
166
193
  - spec/htdocs/child01.html
167
194
  - spec/htdocs/child01_sub.html
168
195
  - spec/htdocs/child02.html
@@ -178,7 +205,9 @@ files:
178
205
  - spec/htdocs/struct/structual_text.html
179
206
  - spec/servers/httpserver.rb
180
207
  - spec/spec_helper.rb
208
+ - spec/yasuri_cli_spec.rb
181
209
  - spec/yasuri_links_node_spec.rb
210
+ - spec/yasuri_map_spec.rb
182
211
  - spec/yasuri_node_spec.rb
183
212
  - spec/yasuri_paginate_node_spec.rb
184
213
  - spec/yasuri_spec.rb
@@ -189,7 +218,7 @@ homepage: https://github.com/tac0x2a/yasuri
189
218
  licenses:
190
219
  - MIT
191
220
  metadata: {}
192
- post_install_message:
221
+ post_install_message:
193
222
  rdoc_options: []
194
223
  require_paths:
195
224
  - lib
@@ -204,12 +233,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
204
233
  - !ruby/object:Gem::Version
205
234
  version: '0'
206
235
  requirements: []
207
- rubyforge_project:
208
- rubygems_version: 2.5.2
209
- signing_key:
236
+ rubygems_version: 3.2.3
237
+ signing_key:
210
238
  specification_version: 4
211
239
  summary: Yasuri is easy scraping library.
212
240
  test_files:
241
+ - spec/cli_resources/tree.json
242
+ - spec/cli_resources/tree.yml
243
+ - spec/cli_resources/tree_wrong.json
244
+ - spec/cli_resources/tree_wrong.yml
213
245
  - spec/htdocs/child01.html
214
246
  - spec/htdocs/child01_sub.html
215
247
  - spec/htdocs/child02.html
@@ -225,7 +257,9 @@ test_files:
225
257
  - spec/htdocs/struct/structual_text.html
226
258
  - spec/servers/httpserver.rb
227
259
  - spec/spec_helper.rb
260
+ - spec/yasuri_cli_spec.rb
228
261
  - spec/yasuri_links_node_spec.rb
262
+ - spec/yasuri_map_spec.rb
229
263
  - spec/yasuri_node_spec.rb
230
264
  - spec/yasuri_paginate_node_spec.rb
231
265
  - spec/yasuri_spec.rb
data/app.rb DELETED
@@ -1,52 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- # Author:: TAC (tac@tac42.net)
5
-
6
- require 'pp'
7
- require 'time'
8
- require 'mechanize'
9
-
10
- require_relative 'lib/yasuri/yasuri'
11
-
12
- agent = Mechanize.new
13
-
14
- uri = "http://www.asahi.com/"
15
-
16
- # Node tree constructing by DSL
17
- root = Yasuri.links_top '//*[@id="MainInner"]/div[1]/ul/li/a' do
18
- text_title '//*[@id="MainInner"]/div[1]/div/h1'
19
- text_article '//*[@id="MainInner"]/div/div[@class="ArticleText"]'
20
- end
21
-
22
- # Node tree constructing by JSON
23
- src = <<-EOJSON
24
- { "node" : "links",
25
- "name" : "root",
26
- "path" : "//*[@id='MainInner']/div[1]/ul/li/a",
27
- "children" : [
28
- { "node" : "text",
29
- "name" : "title",
30
- "path" : "//*[@id='MainInner']/div[1]/div/h1"
31
- },
32
- { "node" : "text",
33
- "name" : "article",
34
- "path" : "//*[@id='MainInner']/div/div[@class='ArticleText']"
35
- }
36
- ]
37
- }
38
- EOJSON
39
- root = Yasuri.json2tree(src)
40
-
41
- # Access to parsed resources
42
- page = agent.get(uri)
43
- contents = root.inject(agent, page)
44
-
45
- contents.each do |h|
46
- t = h['title']
47
- a = h['article']
48
-
49
- puts t
50
- puts a
51
- puts "=" * 100
52
- end