yasuri 2.0.13 → 3.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +35 -0
- data/.rubocop.yml +49 -0
- data/.rubocop_todo.yml +0 -0
- data/.ruby-version +1 -1
- data/README.md +82 -31
- data/Rakefile +1 -1
- data/USAGE.ja.md +366 -131
- data/USAGE.md +371 -136
- data/examples/example.rb +78 -0
- data/examples/github.yml +15 -0
- data/examples/sample.json +4 -0
- data/examples/sample.yml +11 -0
- data/exe/yasuri +5 -0
- data/lib/yasuri.rb +1 -0
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +96 -75
- data/lib/yasuri/yasuri_cli.rb +78 -0
- data/lib/yasuri/yasuri_links_node.rb +10 -6
- data/lib/yasuri/yasuri_map_node.rb +40 -0
- data/lib/yasuri/yasuri_node.rb +36 -4
- data/lib/yasuri/yasuri_node_generator.rb +17 -14
- data/lib/yasuri/yasuri_paginate_node.rb +26 -16
- data/lib/yasuri/yasuri_struct_node.rb +6 -4
- data/lib/yasuri/yasuri_text_node.rb +13 -8
- data/spec/cli_resources/tree.json +8 -0
- data/spec/cli_resources/tree.yml +5 -0
- data/spec/cli_resources/tree_wrong.json +9 -0
- data/spec/cli_resources/tree_wrong.yml +6 -0
- data/spec/servers/httpserver.rb +0 -2
- data/spec/spec_helper.rb +4 -11
- data/spec/yasuri_cli_spec.rb +114 -0
- data/spec/yasuri_links_node_spec.rb +92 -60
- data/spec/yasuri_map_spec.rb +71 -0
- data/spec/yasuri_paginate_node_spec.rb +99 -88
- data/spec/yasuri_spec.rb +196 -138
- data/spec/yasuri_struct_node_spec.rb +120 -100
- data/spec/yasuri_text_node_spec.rb +22 -32
- data/yasuri.gemspec +29 -22
- metadata +108 -19
- data/app.rb +0 -52
- data/spec/yasuri_node_spec.rb +0 -11
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yasuri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAC
|
8
|
-
autorequire:
|
9
|
-
bindir:
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: codeclimate-test-reporter
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: coveralls
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -81,7 +81,7 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: rake
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - ">="
|
@@ -95,7 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: rspec
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -109,7 +109,63 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: rubocop
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rubocop-performance
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rubocop-rspec
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rubocop-rubycw
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: simplecov
|
113
169
|
requirement: !ruby/object:Gem::Requirement
|
114
170
|
requirements:
|
115
171
|
- - ">="
|
@@ -136,16 +192,34 @@ dependencies:
|
|
136
192
|
- - ">="
|
137
193
|
- !ruby/object:Gem::Version
|
138
194
|
version: '0'
|
139
|
-
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: thor
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - ">="
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0'
|
202
|
+
type: :runtime
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - ">="
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0'
|
209
|
+
description: Yasuri is an easy web-scraping library for supporting 'Mechanize'.
|
140
210
|
email:
|
141
211
|
- tac@tac42.net
|
142
|
-
executables:
|
212
|
+
executables:
|
213
|
+
- yasuri
|
143
214
|
extensions: []
|
144
215
|
extra_rdoc_files: []
|
145
216
|
files:
|
146
217
|
- ".coveralls.yml"
|
218
|
+
- ".github/workflows/ruby.yml"
|
147
219
|
- ".gitignore"
|
148
220
|
- ".rspec"
|
221
|
+
- ".rubocop.yml"
|
222
|
+
- ".rubocop_todo.yml"
|
149
223
|
- ".ruby-version"
|
150
224
|
- ".travis.yml"
|
151
225
|
- Gemfile
|
@@ -154,16 +228,26 @@ files:
|
|
154
228
|
- Rakefile
|
155
229
|
- USAGE.ja.md
|
156
230
|
- USAGE.md
|
157
|
-
-
|
231
|
+
- examples/example.rb
|
232
|
+
- examples/github.yml
|
233
|
+
- examples/sample.json
|
234
|
+
- examples/sample.yml
|
235
|
+
- exe/yasuri
|
158
236
|
- lib/yasuri.rb
|
159
237
|
- lib/yasuri/version.rb
|
160
238
|
- lib/yasuri/yasuri.rb
|
239
|
+
- lib/yasuri/yasuri_cli.rb
|
161
240
|
- lib/yasuri/yasuri_links_node.rb
|
241
|
+
- lib/yasuri/yasuri_map_node.rb
|
162
242
|
- lib/yasuri/yasuri_node.rb
|
163
243
|
- lib/yasuri/yasuri_node_generator.rb
|
164
244
|
- lib/yasuri/yasuri_paginate_node.rb
|
165
245
|
- lib/yasuri/yasuri_struct_node.rb
|
166
246
|
- lib/yasuri/yasuri_text_node.rb
|
247
|
+
- spec/cli_resources/tree.json
|
248
|
+
- spec/cli_resources/tree.yml
|
249
|
+
- spec/cli_resources/tree_wrong.json
|
250
|
+
- spec/cli_resources/tree_wrong.yml
|
167
251
|
- spec/htdocs/child01.html
|
168
252
|
- spec/htdocs/child01_sub.html
|
169
253
|
- spec/htdocs/child02.html
|
@@ -179,8 +263,9 @@ files:
|
|
179
263
|
- spec/htdocs/struct/structual_text.html
|
180
264
|
- spec/servers/httpserver.rb
|
181
265
|
- spec/spec_helper.rb
|
266
|
+
- spec/yasuri_cli_spec.rb
|
182
267
|
- spec/yasuri_links_node_spec.rb
|
183
|
-
- spec/
|
268
|
+
- spec/yasuri_map_spec.rb
|
184
269
|
- spec/yasuri_paginate_node_spec.rb
|
185
270
|
- spec/yasuri_spec.rb
|
186
271
|
- spec/yasuri_struct_node_spec.rb
|
@@ -190,7 +275,7 @@ homepage: https://github.com/tac0x2a/yasuri
|
|
190
275
|
licenses:
|
191
276
|
- MIT
|
192
277
|
metadata: {}
|
193
|
-
post_install_message:
|
278
|
+
post_install_message:
|
194
279
|
rdoc_options: []
|
195
280
|
require_paths:
|
196
281
|
- lib
|
@@ -198,19 +283,22 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
198
283
|
requirements:
|
199
284
|
- - ">="
|
200
285
|
- !ruby/object:Gem::Version
|
201
|
-
version:
|
286
|
+
version: 2.7.0
|
202
287
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
203
288
|
requirements:
|
204
289
|
- - ">="
|
205
290
|
- !ruby/object:Gem::Version
|
206
291
|
version: '0'
|
207
292
|
requirements: []
|
208
|
-
|
209
|
-
|
210
|
-
signing_key:
|
293
|
+
rubygems_version: 3.2.3
|
294
|
+
signing_key:
|
211
295
|
specification_version: 4
|
212
296
|
summary: Yasuri is easy scraping library.
|
213
297
|
test_files:
|
298
|
+
- spec/cli_resources/tree.json
|
299
|
+
- spec/cli_resources/tree.yml
|
300
|
+
- spec/cli_resources/tree_wrong.json
|
301
|
+
- spec/cli_resources/tree_wrong.yml
|
214
302
|
- spec/htdocs/child01.html
|
215
303
|
- spec/htdocs/child01_sub.html
|
216
304
|
- spec/htdocs/child02.html
|
@@ -226,8 +314,9 @@ test_files:
|
|
226
314
|
- spec/htdocs/struct/structual_text.html
|
227
315
|
- spec/servers/httpserver.rb
|
228
316
|
- spec/spec_helper.rb
|
317
|
+
- spec/yasuri_cli_spec.rb
|
229
318
|
- spec/yasuri_links_node_spec.rb
|
230
|
-
- spec/
|
319
|
+
- spec/yasuri_map_spec.rb
|
231
320
|
- spec/yasuri_paginate_node_spec.rb
|
232
321
|
- spec/yasuri_spec.rb
|
233
322
|
- spec/yasuri_struct_node_spec.rb
|
data/app.rb
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
# Author:: TAC (tac@tac42.net)
|
5
|
-
|
6
|
-
require 'pp'
|
7
|
-
require 'time'
|
8
|
-
require 'mechanize'
|
9
|
-
|
10
|
-
require_relative 'lib/yasuri/yasuri'
|
11
|
-
|
12
|
-
agent = Mechanize.new
|
13
|
-
|
14
|
-
uri = "http://www.asahi.com/"
|
15
|
-
|
16
|
-
# Node tree constructing by DSL
|
17
|
-
root = Yasuri.links_top '//*[@id="MainInner"]/div[1]/ul/li/a' do
|
18
|
-
text_title '//*[@id="MainInner"]/div[1]/div/h1'
|
19
|
-
text_article '//*[@id="MainInner"]/div/div[@class="ArticleText"]'
|
20
|
-
end
|
21
|
-
|
22
|
-
# Node tree constructing by JSON
|
23
|
-
src = <<-EOJSON
|
24
|
-
{ "node" : "links",
|
25
|
-
"name" : "root",
|
26
|
-
"path" : "//*[@id='MainInner']/div[1]/ul/li/a",
|
27
|
-
"children" : [
|
28
|
-
{ "node" : "text",
|
29
|
-
"name" : "title",
|
30
|
-
"path" : "//*[@id='MainInner']/div[1]/div/h1"
|
31
|
-
},
|
32
|
-
{ "node" : "text",
|
33
|
-
"name" : "article",
|
34
|
-
"path" : "//*[@id='MainInner']/div/div[@class='ArticleText']"
|
35
|
-
}
|
36
|
-
]
|
37
|
-
}
|
38
|
-
EOJSON
|
39
|
-
root = Yasuri.json2tree(src)
|
40
|
-
|
41
|
-
# Access to parsed resources
|
42
|
-
page = agent.get(uri)
|
43
|
-
contents = root.inject(agent, page)
|
44
|
-
|
45
|
-
contents.each do |h|
|
46
|
-
t = h['title']
|
47
|
-
a = h['article']
|
48
|
-
|
49
|
-
puts t
|
50
|
-
puts a
|
51
|
-
puts "=" * 100
|
52
|
-
end
|