yasuri 1.9.12 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +35 -0
- data/.gitignore +1 -2
- data/.ruby-version +1 -0
- data/.travis.yml +1 -3
- data/README.md +40 -1
- data/USAGE.ja.md +94 -11
- data/USAGE.md +96 -12
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +44 -39
- data/lib/yasuri/yasuri_links_node.rb +6 -2
- data/lib/yasuri/yasuri_map_node.rb +54 -0
- data/lib/yasuri/yasuri_node.rb +45 -2
- data/lib/yasuri/yasuri_node_generator.rb +16 -11
- data/lib/yasuri/yasuri_paginate_node.rb +7 -3
- data/lib/yasuri/yasuri_text_node.rb +7 -3
- data/spec/spec_helper.rb +0 -5
- data/spec/yasuri_links_node_spec.rb +12 -4
- data/spec/yasuri_map_spec.rb +76 -0
- data/spec/yasuri_spec.rb +131 -2
- data/spec/yasuri_struct_node_spec.rb +1 -1
- data/yasuri.gemspec +2 -2
- metadata +19 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f3542a2cc0959a4534520f6104fc2922bdf0dbd368fcd4c149c3d251c2fc2198
|
4
|
+
data.tar.gz: 6fdb960db697e9a4ec1d87f2b83bf0e9914e3c9efe90764536bbee6d68774353
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9df576243bea289f4c285c46f1bd2137b7b69b79b24e0c657e4ac952114dd7bcf82a5f95cd2dae88c6eac4e3e468273b7dbd6ead9d05ffdc8d25861921702333
|
7
|
+
data.tar.gz: 13f2ae72b3e8fa6d3ef58932daa2acad49f5d4f57c80f34e5215394940fc2305bc016d949760efe9f43ae2b8c3796064a1b0bd9bccf236cfe3789c2c291dfd8b
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.6', '2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Run tests
|
35
|
+
run: bundle exec rake
|
data/.gitignore
CHANGED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.0.0
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
# Yasuri
|
1
|
+
# Yasuri
|
2
|
+
[![Build Status](https://github.com/tac0x2a/yasuri/actions/workflows/ruby.yml/badge.svg)](https://github.com/tac0x2a/yasuri/actions/workflows/ruby.yml)
|
3
|
+
[![Coverage Status](https://coveralls.io/repos/tac0x2a/yasuri/badge.svg?branch=master)](https://coveralls.io/r/tac0x2a/yasuri?branch=master) [![Maintainability](https://api.codeclimate.com/v1/badges/c29480fea1305afe999f/maintainability)](https://codeclimate.com/github/tac0x2a/yasuri/maintainability)
|
2
4
|
|
3
5
|
Yasuri (鑢) is an easy web-scraping library for supporting "[Mechanize](https://github.com/sparklemotion/mechanize)".
|
4
6
|
|
@@ -32,6 +34,9 @@ or
|
|
32
34
|
```ruby
|
33
35
|
# for Ruby 1.9.3 or lower
|
34
36
|
gem 'yasuri', '~> 1.9'
|
37
|
+
|
38
|
+
# for Ruby 3.0.0 or lower
|
39
|
+
gem 'yasuri', '~> 3.0.1'
|
35
40
|
```
|
36
41
|
|
37
42
|
|
@@ -52,6 +57,19 @@ root = Yasuri.links_root '//*[@id="menu"]/ul/li/a' do
|
|
52
57
|
text_content '//*[@id="contents"]/p[1]'
|
53
58
|
end
|
54
59
|
|
60
|
+
|
61
|
+
# Node tree constructing by YAML
|
62
|
+
src = <<-EOYAML
|
63
|
+
root:
|
64
|
+
node: links
|
65
|
+
path: "//*[@id='menu']/ul/li/a"
|
66
|
+
children:
|
67
|
+
- title: { node: text, path: "//*[@id='contents']/h2" }
|
68
|
+
- content: { node: text, path: "//*[@id='contents']/p[1]" }
|
69
|
+
EOYAML
|
70
|
+
root = Yasuri.yaml2tree(src)
|
71
|
+
|
72
|
+
|
55
73
|
# Node tree constructing by JSON
|
56
74
|
src = <<-EOJSON
|
57
75
|
{ "node" : "links",
|
@@ -78,6 +96,27 @@ result = root.inject(agent, root_page)
|
|
78
96
|
# => [ {"title" => "PageTitle", "content" => "Page Contents" }, ... ]
|
79
97
|
```
|
80
98
|
|
99
|
+
## Dev
|
100
|
+
```sh
|
101
|
+
$ gem install bundler
|
102
|
+
$ bundle install
|
103
|
+
```
|
104
|
+
### Test
|
105
|
+
```sh
|
106
|
+
$ rake
|
107
|
+
# or
|
108
|
+
$ rspec spec/*spec.rb
|
109
|
+
```
|
110
|
+
|
111
|
+
### Release RubyGems
|
112
|
+
```sh
|
113
|
+
# Only first time
|
114
|
+
$ curl -u <user_name> https://rubygems.org/api/v1/api_key.yaml > ~/.gem/credentials
|
115
|
+
$ chmod 0600 ~/.gem/credentials
|
116
|
+
|
117
|
+
$ nano lib/yasuri/version.rb # edit gem version
|
118
|
+
$ rake release
|
119
|
+
```
|
81
120
|
|
82
121
|
## Contributing
|
83
122
|
|
data/USAGE.ja.md
CHANGED
@@ -67,7 +67,7 @@ page = agent.get(uri)
|
|
67
67
|
tree.inject(agent, page)
|
68
68
|
```
|
69
69
|
|
70
|
-
ツリーは、DSL
|
70
|
+
ツリーは、json,yaml,またはDSLで定義することができます.上の例ではDSLで定義しています.
|
71
71
|
以下は、jsonで上記と等価な解析ツリーを定義した例です.
|
72
72
|
|
73
73
|
```ruby
|
@@ -87,25 +87,54 @@ EOJSON
|
|
87
87
|
tree = Yasuri.json2tree(src)
|
88
88
|
```
|
89
89
|
|
90
|
+
```ruby
|
91
|
+
# yaml で構成する場合
|
92
|
+
src = <<-EOYAML
|
93
|
+
title:
|
94
|
+
node: links
|
95
|
+
path: "/html/body/a"
|
96
|
+
children:
|
97
|
+
- name:
|
98
|
+
node: text
|
99
|
+
path: "/html/body/p"
|
100
|
+
EOYAML
|
101
|
+
tree = Yasuri.yaml2tree(src)
|
102
|
+
```
|
90
103
|
|
91
104
|
### Node
|
92
105
|
ツリーは入れ子になった *Node* で構成されます.
|
93
106
|
Node は `Type`, `Name`, `Path`, `Childlen`, `Options` を持っています.
|
107
|
+
(ただし、`MapNode` のみ `Path` を持ちません)
|
94
108
|
|
95
109
|
Nodeは以下のフォーマットで定義されます.
|
96
110
|
|
97
111
|
```ruby
|
98
|
-
# トップレベル
|
99
112
|
Yasuri.<Type>_<Name> <Path> [,<Options>]
|
100
113
|
|
101
114
|
# 入れ子になっている場合
|
102
115
|
Yasuri.<Type>_<Name> <Path> [,<Options>] do
|
103
116
|
<Type>_<Name> <Path> [,<Options>] do
|
104
|
-
<
|
117
|
+
<Type>_<Name> <Path> [,<Options>]
|
118
|
+
...
|
105
119
|
end
|
106
120
|
end
|
107
121
|
```
|
108
122
|
|
123
|
+
例
|
124
|
+
|
125
|
+
```ruby
|
126
|
+
Yasuri.text_title '/html/head/title', truncate:/^[^,]+/
|
127
|
+
|
128
|
+
# 入れ子になっている場合
|
129
|
+
Yasuri.links_root '//*[@id="menu"]/ul/li/a' do
|
130
|
+
struct_table './tr' do
|
131
|
+
text_title './td[1]'
|
132
|
+
text_pub_date './td[2]'
|
133
|
+
end
|
134
|
+
end
|
135
|
+
```
|
136
|
+
|
137
|
+
|
109
138
|
#### Type
|
110
139
|
*Type* は Nodeの振る舞いを示します.Typeには以下のものがあります.
|
111
140
|
|
@@ -113,18 +142,19 @@ end
|
|
113
142
|
- *Struct*
|
114
143
|
- *Links*
|
115
144
|
- *Paginate*
|
145
|
+
- *Map*
|
116
146
|
|
117
|
-
|
147
|
+
#### Name
|
118
148
|
*Name* は 解析結果のHashにおけるキーになります.
|
119
149
|
|
120
|
-
|
150
|
+
#### Path
|
121
151
|
*Path* は xpath あるいは css セレクタによって、HTML上の特定のノードを指定します.
|
122
152
|
これは Machinize の `search` で使用されます.
|
123
153
|
|
124
|
-
|
154
|
+
#### Childlen
|
125
155
|
入れ子になっているノードの子ノードです.TextNodeはツリーの葉に当たるため、子ノードを持ちません.
|
126
156
|
|
127
|
-
|
157
|
+
#### Options
|
128
158
|
パースのオプションです.オプションはTypeごとに異なります.
|
129
159
|
各ノードに対して、`opt`メソッドをコールすることで、利用可能なオプションを取得できます.
|
130
160
|
|
@@ -156,13 +186,15 @@ page = agent.get("http://yasuri.example.net")
|
|
156
186
|
|
157
187
|
p1 = Yasuri.text_title '/html/body/p[1]'
|
158
188
|
p1t = Yasuri.text_title '/html/body/p[1]', truncate:/^[^,]+/
|
159
|
-
p2u = Yasuri.text_title '/html/body/p[
|
189
|
+
p2u = Yasuri.text_title '/html/body/p[1]', proc: :upcase
|
160
190
|
|
161
|
-
p1.inject(agent, page) #=>
|
162
|
-
p1t.inject(agent, page) #=>
|
163
|
-
|
191
|
+
p1.inject(agent, page) #=> "Hello,World"
|
192
|
+
p1t.inject(agent, page) #=> "Hello"
|
193
|
+
p2u.inject(agent, page) #=> "HELLO,WORLD"
|
164
194
|
```
|
165
195
|
|
196
|
+
なお、同じページ内の複数の要素を一度にスクレイピングする場合は、`MapNode`を使用します。
|
197
|
+
|
166
198
|
### オプション
|
167
199
|
##### `truncate`
|
168
200
|
正規表現にマッチした文字列を取り出します.グループを指定した場合、最初にマッチしたグループだけを返します.
|
@@ -466,3 +498,54 @@ node.inject(agent, page)
|
|
466
498
|
"Page03",
|
467
499
|
"Patination03"]
|
468
500
|
```
|
501
|
+
|
502
|
+
## Map Node
|
503
|
+
*MapNode* はスクレイピングした結果をまとめるノードです.このノードはパースツリーにおいて常に節です.
|
504
|
+
|
505
|
+
### 例
|
506
|
+
|
507
|
+
```html
|
508
|
+
<!-- http://yasuri.example.net -->
|
509
|
+
<html>
|
510
|
+
<head><title>Yasuri Example</title></head>
|
511
|
+
<body>
|
512
|
+
<p>Hello,World</p>
|
513
|
+
<p>Hello,Yasuri</p>
|
514
|
+
</body>
|
515
|
+
</html>
|
516
|
+
```
|
517
|
+
|
518
|
+
```ruby
|
519
|
+
agent = Mechanize.new
|
520
|
+
page = agent.get("http://yasuri.example.net")
|
521
|
+
|
522
|
+
|
523
|
+
tree = Yasuri.map_root do
|
524
|
+
text_title '/html/head/title'
|
525
|
+
text_body_p '/html/body/p[1]'
|
526
|
+
end
|
527
|
+
|
528
|
+
tree.inject(agent, page) #=> { "title" => "Yasuri Example", "body_p" => "Hello,World" }
|
529
|
+
|
530
|
+
|
531
|
+
tree = Yasuri.map_root do
|
532
|
+
map_group1 { text_child01 '/html/body/a[1]' }
|
533
|
+
map_group2 do
|
534
|
+
text_child01 '/html/body/a[1]'
|
535
|
+
text_child03 '/html/body/a[3]'
|
536
|
+
end
|
537
|
+
end
|
538
|
+
|
539
|
+
tree.inject(agent, page) #=> {
|
540
|
+
# "group1" => {
|
541
|
+
# "child01" => "child01"
|
542
|
+
# },
|
543
|
+
# "group2" => {
|
544
|
+
# "child01" => "child01",
|
545
|
+
# "child03" => "child03"
|
546
|
+
# }
|
547
|
+
# }
|
548
|
+
```
|
549
|
+
|
550
|
+
### オプション
|
551
|
+
なし
|
data/USAGE.md
CHANGED
@@ -69,7 +69,7 @@ page = agent.get(uri)
|
|
69
69
|
tree.inject(agent, page)
|
70
70
|
```
|
71
71
|
|
72
|
-
Tree is definable by
|
72
|
+
Tree is definable by 3(+1) ways, json, yaml, and DSL (or basic ruby code). In above example, DSL.
|
73
73
|
|
74
74
|
```ruby
|
75
75
|
# Construct by json.
|
@@ -88,21 +88,51 @@ EOJSON
|
|
88
88
|
tree = Yasuri.json2tree(src)
|
89
89
|
```
|
90
90
|
|
91
|
+
```ruby
|
92
|
+
# Construct by yaml.
|
93
|
+
src = <<-EOYAML
|
94
|
+
title:
|
95
|
+
node: links
|
96
|
+
path: "/html/body/a"
|
97
|
+
children:
|
98
|
+
- name:
|
99
|
+
node: text
|
100
|
+
path: "/html/body/p"
|
101
|
+
EOYAML
|
102
|
+
tree = Yasuri.yaml2tree(src)
|
103
|
+
```
|
104
|
+
|
105
|
+
|
91
106
|
### Node
|
92
107
|
Tree is constructed by nested Nodes.
|
93
108
|
Node has `Type`, `Name`, `Path`, `Childlen`, and `Options`.
|
109
|
+
(But only `MapNode` does not have `Path`.)
|
94
110
|
|
95
111
|
Node is defined by this format.
|
96
112
|
|
97
113
|
|
98
114
|
```ruby
|
99
|
-
# Top Level
|
100
115
|
Yasuri.<Type>_<Name> <Path> [,<Options>]
|
101
116
|
|
102
|
-
# Nested
|
117
|
+
# Nested case
|
103
118
|
Yasuri.<Type>_<Name> <Path> [,<Options>] do
|
104
119
|
<Type>_<Name> <Path> [,<Options>] do
|
105
|
-
<
|
120
|
+
<Type>_<Name> <Path> [,<Options>]
|
121
|
+
...
|
122
|
+
end
|
123
|
+
end
|
124
|
+
```
|
125
|
+
|
126
|
+
Example
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
Yasuri.text_title '/html/head/title', truncate:/^[^,]+/
|
130
|
+
|
131
|
+
# Nested case
|
132
|
+
Yasuri.links_root '//*[@id="menu"]/ul/li/a' do
|
133
|
+
struct_table './tr' do
|
134
|
+
text_title './td[1]'
|
135
|
+
text_pub_date './td[2]'
|
106
136
|
end
|
107
137
|
end
|
108
138
|
```
|
@@ -114,17 +144,18 @@ Type meen behavior of Node.
|
|
114
144
|
- *Struct*
|
115
145
|
- *Links*
|
116
146
|
- *Paginate*
|
147
|
+
- *Map*
|
117
148
|
|
118
|
-
|
149
|
+
#### Name
|
119
150
|
Name is used keys in returned hash.
|
120
151
|
|
121
|
-
|
152
|
+
#### Path
|
122
153
|
Path determine target node by xpath or css selector. It given by Machinize `search`.
|
123
154
|
|
124
|
-
|
155
|
+
#### Childlen
|
125
156
|
Child nodes. TextNode has always empty set, because TextNode is leaf.
|
126
157
|
|
127
|
-
|
158
|
+
#### Options
|
128
159
|
Parse options. It different in each types. You can get options and values by `opt` method.
|
129
160
|
|
130
161
|
```ruby
|
@@ -155,13 +186,15 @@ page = agent.get("http://yasuri.example.net")
|
|
155
186
|
|
156
187
|
p1 = Yasuri.text_title '/html/body/p[1]'
|
157
188
|
p1t = Yasuri.text_title '/html/body/p[1]', truncate:/^[^,]+/
|
158
|
-
p2u = Yasuri.text_title '/html/body/p[
|
189
|
+
p2u = Yasuri.text_title '/html/body/p[1]', proc: :upcase
|
159
190
|
|
160
|
-
p1.inject(agent, page) #=>
|
161
|
-
p1t.inject(agent, page) #=>
|
162
|
-
|
191
|
+
p1.inject(agent, page) #=> "Hello,World"
|
192
|
+
p1t.inject(agent, page) #=> "Hello"
|
193
|
+
p2u.inject(agent, page) #=> "HELLO,WORLD"
|
163
194
|
```
|
164
195
|
|
196
|
+
Note that if you want to scrape multiple elements in the same page at once, use `MapNode`. See the `MapNode` example for details.
|
197
|
+
|
165
198
|
### Options
|
166
199
|
##### `truncate`
|
167
200
|
Match to regexp, and truncate text. When you use group, it will return first matched group only.
|
@@ -464,3 +497,54 @@ node.inject(agent, page)
|
|
464
497
|
"Page03",
|
465
498
|
"Patination03"]
|
466
499
|
```
|
500
|
+
|
501
|
+
## Map Node
|
502
|
+
*MapNode* is a node that summarizes the results of scraping. This node is always a branch node in the parse tree.
|
503
|
+
|
504
|
+
### Example
|
505
|
+
|
506
|
+
```html
|
507
|
+
<!-- http://yasuri.example.net -->
|
508
|
+
<html>
|
509
|
+
<head><title>Yasuri Example</title></head>
|
510
|
+
<body>
|
511
|
+
<p>Hello,World</p>
|
512
|
+
<p>Hello,Yasuri</p>
|
513
|
+
</body>
|
514
|
+
</html>
|
515
|
+
```
|
516
|
+
|
517
|
+
```ruby
|
518
|
+
agent = Mechanize.new
|
519
|
+
page = agent.get("http://yasuri.example.net")
|
520
|
+
|
521
|
+
|
522
|
+
tree = Yasuri.map_root do
|
523
|
+
text_title '/html/head/title'
|
524
|
+
text_body_p '/html/body/p[1]'
|
525
|
+
end
|
526
|
+
|
527
|
+
tree.inject(agent, page) #=> { "title" => "Yasuri Example", "body_p" => "Hello,World" }
|
528
|
+
|
529
|
+
|
530
|
+
tree = Yasuri.map_root do
|
531
|
+
map_group1 { text_child01 '/html/body/a[1]' }
|
532
|
+
map_group2 do
|
533
|
+
text_child01 '/html/body/a[1]'
|
534
|
+
text_child03 '/html/body/a[3]'
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
tree.inject(agent, page) #=> {
|
539
|
+
# "group1" => {
|
540
|
+
# "child01" => "child01"
|
541
|
+
# },
|
542
|
+
# "group2" => {
|
543
|
+
# "child01" => "child01",
|
544
|
+
# "child03" => "child03"
|
545
|
+
# }
|
546
|
+
# }
|
547
|
+
```
|
548
|
+
|
549
|
+
### Options
|
550
|
+
None.
|
data/lib/yasuri/version.rb
CHANGED
data/lib/yasuri/yasuri.rb
CHANGED
@@ -4,12 +4,14 @@
|
|
4
4
|
|
5
5
|
require 'mechanize'
|
6
6
|
require 'json'
|
7
|
+
require 'yaml'
|
7
8
|
|
8
9
|
require_relative 'yasuri_node'
|
9
10
|
require_relative 'yasuri_text_node'
|
10
11
|
require_relative 'yasuri_struct_node'
|
11
12
|
require_relative 'yasuri_paginate_node'
|
12
13
|
require_relative 'yasuri_links_node'
|
14
|
+
require_relative 'yasuri_map_node'
|
13
15
|
require_relative 'yasuri_node_generator'
|
14
16
|
|
15
17
|
module Yasuri
|
@@ -23,9 +25,39 @@ module Yasuri
|
|
23
25
|
Yasuri.node2hash(node).to_json
|
24
26
|
end
|
25
27
|
|
26
|
-
def self.
|
27
|
-
|
28
|
-
|
28
|
+
def self.yaml2tree(yaml_string)
|
29
|
+
raise RuntimeError if yaml_string.nil? or yaml_string.empty?
|
30
|
+
|
31
|
+
yaml = YAML.load(yaml_string)
|
32
|
+
raise RuntimeError if yaml.keys.size < 1
|
33
|
+
|
34
|
+
root_key, root = yaml.keys.first, yaml.values.first
|
35
|
+
hash = Yasuri.yaml2tree_sub(root_key, root)
|
36
|
+
|
37
|
+
Yasuri.hash2node(hash)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def self.yaml2tree_sub(name, body)
|
42
|
+
return nil if name.nil? or body.nil?
|
43
|
+
|
44
|
+
new_body = Hash[:name, name]
|
45
|
+
body.each{|k,v| new_body[k.to_sym] = v}
|
46
|
+
body = new_body
|
47
|
+
|
48
|
+
return body if body[:children].nil?
|
49
|
+
|
50
|
+
body[:children] = body[:children].map do |c|
|
51
|
+
k, b = c.keys.first, c.values.first
|
52
|
+
Yasuri.yaml2tree_sub(k, b)
|
53
|
+
end
|
54
|
+
|
55
|
+
body
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.method_missing(method_name, pattern=nil, **opt, &block)
|
59
|
+
generated = Yasuri::NodeGenerator.gen(method_name, pattern, **opt, &block)
|
60
|
+
generated || super(method_name, **opt)
|
29
61
|
end
|
30
62
|
|
31
63
|
private
|
@@ -33,53 +65,26 @@ module Yasuri
|
|
33
65
|
text: Yasuri::TextNode,
|
34
66
|
struct: Yasuri::StructNode,
|
35
67
|
links: Yasuri::LinksNode,
|
36
|
-
pages: Yasuri::PaginateNode
|
68
|
+
pages: Yasuri::PaginateNode,
|
69
|
+
map: Yasuri::MapNode
|
37
70
|
}
|
38
71
|
Node2Text = Text2Node.invert
|
39
72
|
|
40
|
-
ReservedKeys =
|
73
|
+
ReservedKeys = %i|node name path children|
|
41
74
|
def self.hash2node(node_h)
|
42
|
-
node
|
43
|
-
node_h[key]
|
44
|
-
end
|
45
|
-
children ||= []
|
46
|
-
|
47
|
-
fail "Not found 'node' value in json" if node.nil?
|
48
|
-
fail "Not found 'name' value in json" if name.nil?
|
49
|
-
fail "Not found 'path' value in json" if path.nil?
|
50
|
-
|
51
|
-
childnodes = children.map{|c| Yasuri.hash2node(c) }
|
52
|
-
ReservedKeys.each{|key| node_h.delete(key)}
|
53
|
-
opt = node_h
|
75
|
+
node = node_h[:node]
|
54
76
|
|
77
|
+
fail "Not found 'node' value in map" if node.nil?
|
55
78
|
klass = Text2Node[node.to_sym]
|
56
|
-
|
57
|
-
klass.new(path, name, childnodes, opt)
|
79
|
+
klass::hash2node(node_h)
|
58
80
|
end
|
59
81
|
|
60
82
|
def self.node2hash(node)
|
61
|
-
|
62
|
-
return json if node.nil?
|
63
|
-
|
64
|
-
klass = node.class
|
65
|
-
klass_str = Node2Text[klass]
|
66
|
-
|
67
|
-
json["node"] = klass_str
|
68
|
-
json["name"] = node.name
|
69
|
-
json["path"] = node.xpath
|
70
|
-
|
71
|
-
children = node.children.map{|c| Yasuri.node2hash(c)}
|
72
|
-
json["children"] = children if not children.empty?
|
73
|
-
|
74
|
-
node.opts.each do |key,value|
|
75
|
-
json[key] = value if not value.nil?
|
76
|
-
end
|
77
|
-
|
78
|
-
json
|
83
|
+
node.to_h
|
79
84
|
end
|
80
85
|
|
81
|
-
def self.NodeName(name,
|
82
|
-
symbolize_names =
|
86
|
+
def self.NodeName(name, opt)
|
87
|
+
symbolize_names = opt[:symbolize_names]
|
83
88
|
symbolize_names ? name.to_sym : name
|
84
89
|
end
|
85
90
|
|
@@ -0,0 +1,54 @@
|
|
1
|
+
|
2
|
+
module Yasuri
|
3
|
+
class MapNode
|
4
|
+
attr_reader :name, :children
|
5
|
+
|
6
|
+
def initialize(name, children, opt: {})
|
7
|
+
@name = name
|
8
|
+
@children = children
|
9
|
+
@opt = opt
|
10
|
+
end
|
11
|
+
|
12
|
+
def inject(agent, page, opt = {}, element = page)
|
13
|
+
child_results_kv = @children.map do |node|
|
14
|
+
[node.name, node.inject(agent, page, opt)]
|
15
|
+
end
|
16
|
+
Hash[child_results_kv]
|
17
|
+
end
|
18
|
+
|
19
|
+
def opts
|
20
|
+
{}
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_h
|
24
|
+
h = {}
|
25
|
+
h["node"] = "map"
|
26
|
+
h["name"] = self.name
|
27
|
+
h["children"] = self.children.map{|c| c.to_h} if not children.empty?
|
28
|
+
|
29
|
+
self.opts.each do |key,value|
|
30
|
+
h[key] = value if not value.nil?
|
31
|
+
end
|
32
|
+
|
33
|
+
h
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.hash2node(node_h)
|
37
|
+
reservedKeys = %i|node name children|
|
38
|
+
|
39
|
+
node, name, children = reservedKeys.map do |key|
|
40
|
+
node_h[key]
|
41
|
+
end
|
42
|
+
|
43
|
+
fail "Not found 'name' value in map" if name.nil?
|
44
|
+
fail "Not found 'children' value in map" if children.nil?
|
45
|
+
children ||= []
|
46
|
+
|
47
|
+
childnodes = children.map{|c| Yasuri.hash2node(c) }
|
48
|
+
reservedKeys.each{|key| node_h.delete(key)}
|
49
|
+
opt = node_h
|
50
|
+
|
51
|
+
self.new(name, childnodes, **opt)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/yasuri/yasuri_node.rb
CHANGED
@@ -7,15 +7,58 @@ module Yasuri
|
|
7
7
|
module Node
|
8
8
|
attr_reader :url, :xpath, :name, :children
|
9
9
|
|
10
|
-
def initialize(xpath, name, children = [], opt
|
10
|
+
def initialize(xpath, name, children = [], opt: {})
|
11
11
|
@xpath, @name, @children = xpath, name, children
|
12
12
|
end
|
13
13
|
|
14
14
|
def inject(agent, page, opt = {}, element = page)
|
15
|
-
fail "#{Kernel.__method__} is not implemented."
|
15
|
+
fail "#{Kernel.__method__} is not implemented in included class."
|
16
16
|
end
|
17
|
+
|
17
18
|
def opts
|
18
19
|
{}
|
19
20
|
end
|
21
|
+
|
22
|
+
def to_h
|
23
|
+
h = {}
|
24
|
+
h["node"] = self.node_type_str
|
25
|
+
h["name"] = self.name
|
26
|
+
h["path"] = self.xpath
|
27
|
+
h["children"] = self.children.map{|c| c.to_h} if not children.empty?
|
28
|
+
|
29
|
+
self.opts.each do |key,value|
|
30
|
+
h[key] = value if not value.nil?
|
31
|
+
end
|
32
|
+
|
33
|
+
h
|
34
|
+
end
|
35
|
+
|
36
|
+
module ClassMethods
|
37
|
+
def hash2node(node_h)
|
38
|
+
reservedKeys = %i|node name path children|
|
39
|
+
|
40
|
+
node, name, path, children = ReservedKeys.map do |key|
|
41
|
+
node_h[key]
|
42
|
+
end
|
43
|
+
|
44
|
+
fail "Not found 'name' value in map" if name.nil?
|
45
|
+
fail "Not found 'path' value in map" if path.nil?
|
46
|
+
children ||= []
|
47
|
+
|
48
|
+
childnodes = children.map{|c| Yasuri.hash2node(c) }
|
49
|
+
reservedKeys.each{|key| node_h.delete(key)}
|
50
|
+
opt = node_h
|
51
|
+
|
52
|
+
self.new(path, name, childnodes, **opt)
|
53
|
+
end
|
54
|
+
|
55
|
+
def node_type_str
|
56
|
+
fail "#{Kernel.__method__} is not implemented in included class."
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.included(base)
|
61
|
+
base.extend(ClassMethods)
|
62
|
+
end
|
20
63
|
end
|
21
64
|
end
|
@@ -6,6 +6,7 @@ require_relative 'yasuri_text_node'
|
|
6
6
|
require_relative 'yasuri_struct_node'
|
7
7
|
require_relative 'yasuri_links_node'
|
8
8
|
require_relative 'yasuri_paginate_node'
|
9
|
+
require_relative 'yasuri_map_node'
|
9
10
|
|
10
11
|
module Yasuri
|
11
12
|
class NodeGenerator
|
@@ -15,29 +16,33 @@ module Yasuri
|
|
15
16
|
@nodes
|
16
17
|
end
|
17
18
|
|
18
|
-
def method_missing(name,
|
19
|
-
node = NodeGenerator.gen(name,
|
19
|
+
def method_missing(name, pattern=nil, **args, &block)
|
20
|
+
node = NodeGenerator.gen(name, pattern, **args, &block)
|
20
21
|
raise "Undefined Node Name '#{name}'" if node == nil
|
21
22
|
@nodes << node
|
22
23
|
end
|
23
24
|
|
24
|
-
def self.gen(
|
25
|
-
xpath, opt = *args
|
26
|
-
opt = [opt].flatten.compact
|
25
|
+
def self.gen(method_name, xpath, **opt, &block)
|
27
26
|
children = Yasuri::NodeGenerator.new.gen_recursive(&block) if block_given?
|
28
27
|
|
29
|
-
case
|
28
|
+
case method_name
|
30
29
|
when /^text_(.+)$/
|
31
|
-
|
30
|
+
# Todo raise error xpath is not valid
|
31
|
+
Yasuri::TextNode.new(xpath, $1, children || [], **opt)
|
32
32
|
when /^struct_(.+)$/
|
33
|
-
|
33
|
+
# Todo raise error xpath is not valid
|
34
|
+
Yasuri::StructNode.new(xpath, $1, children || [], **opt)
|
34
35
|
when /^links_(.+)$/
|
35
|
-
|
36
|
+
# Todo raise error xpath is not valid
|
37
|
+
Yasuri::LinksNode.new(xpath, $1, children || [], **opt)
|
36
38
|
when /^pages_(.+)$/
|
37
|
-
|
39
|
+
# Todo raise error xpath is not valid
|
40
|
+
Yasuri::PaginateNode.new(xpath, $1, children || [], **opt)
|
41
|
+
when /^map_(.+)$/
|
42
|
+
Yasuri::MapNode.new($1, children, **opt)
|
38
43
|
else
|
39
44
|
nil
|
40
45
|
end
|
41
|
-
end # of self.gen(
|
46
|
+
end # of self.gen(method_name, xpath, **opt, &block)
|
42
47
|
end # of class NodeGenerator
|
43
48
|
end
|
@@ -7,10 +7,10 @@ module Yasuri
|
|
7
7
|
class PaginateNode
|
8
8
|
include Node
|
9
9
|
|
10
|
-
def initialize(xpath, name, children = [],
|
10
|
+
def initialize(xpath, name, children = [], limit: nil, flatten: false)
|
11
11
|
super(xpath, name, children)
|
12
|
-
@
|
13
|
-
@
|
12
|
+
@flatten = flatten
|
13
|
+
@limit = limit
|
14
14
|
end
|
15
15
|
|
16
16
|
def inject(agent, page, opt = {}, element = page)
|
@@ -44,5 +44,9 @@ module Yasuri
|
|
44
44
|
def opts
|
45
45
|
{limit:@limit, flatten:@flatten}
|
46
46
|
end
|
47
|
+
|
48
|
+
def node_type_str
|
49
|
+
"pages"
|
50
|
+
end
|
47
51
|
end
|
48
52
|
end
|
@@ -7,11 +7,11 @@ module Yasuri
|
|
7
7
|
class TextNode
|
8
8
|
include Node
|
9
9
|
|
10
|
-
def initialize(xpath, name, children = [],
|
10
|
+
def initialize(xpath, name, children = [], **opt)
|
11
11
|
super(xpath, name, children)
|
12
12
|
|
13
|
-
truncate =
|
14
|
-
proc
|
13
|
+
truncate = opt[:truncate]
|
14
|
+
proc = opt[:proc]
|
15
15
|
|
16
16
|
truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
|
17
17
|
@truncate = truncate
|
@@ -34,6 +34,10 @@ module Yasuri
|
|
34
34
|
text
|
35
35
|
end
|
36
36
|
|
37
|
+
def node_type_str
|
38
|
+
"text"
|
39
|
+
end
|
40
|
+
|
37
41
|
def opts
|
38
42
|
{truncate:@truncate, proc:@proc}
|
39
43
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -12,11 +12,6 @@ shared_context 'httpserver' do
|
|
12
12
|
}
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
# ENV['CODECLIMATE_REPO_TOKEN'] = "0dc78d33107a7f11f257c0218ac1a37e0073005bb9734f2fd61d0f7e803fc151"
|
17
|
-
# require "codeclimate-test-reporter"
|
18
|
-
# CodeClimate::TestReporter.start
|
19
|
-
|
20
15
|
require 'simplecov'
|
21
16
|
require 'coveralls'
|
22
17
|
Coveralls.wear!
|
@@ -59,10 +59,18 @@ describe 'Yasuri' do
|
|
59
59
|
]
|
60
60
|
expect(actual).to match expected
|
61
61
|
end
|
62
|
-
it 'can be defined by DSL, return
|
63
|
-
|
64
|
-
|
65
|
-
|
62
|
+
it 'can be defined by DSL, return no contains if no child node' do
|
63
|
+
root_node = Yasuri.links_title '/html/body/a'
|
64
|
+
actual = root_node.inject(@agent, @index_page)
|
65
|
+
expected = [{}, {}, {}] # Empty if no child node under links node.
|
66
|
+
expect(actual).to match expected
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'can be defined return no contains if no child node' do
|
70
|
+
root_node = Yasuri::LinksNode.new('/html/body/a', "title")
|
71
|
+
actual = root_node.inject(@agent, @index_page)
|
72
|
+
expected = [{}, {}, {}] # Empty if no child node under links node.
|
73
|
+
expect(actual).to match expected
|
66
74
|
end
|
67
75
|
it 'can be defined by DSL, return nested contents under link' do
|
68
76
|
generated = Yasuri.links_title '/html/body/a' do
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Yasuri' do
|
4
|
+
include_context 'httpserver'
|
5
|
+
|
6
|
+
before do
|
7
|
+
@agent = Mechanize.new
|
8
|
+
@index_page = @agent.get(uri)
|
9
|
+
end
|
10
|
+
|
11
|
+
describe '::MapNode' do
|
12
|
+
it "multi scrape in singe page" do
|
13
|
+
map = Yasuri.map_sample do
|
14
|
+
text_title '/html/head/title'
|
15
|
+
text_body_p '/html/body/p[1]'
|
16
|
+
end
|
17
|
+
actual = map.inject(@agent, @index_page)
|
18
|
+
|
19
|
+
expected = {
|
20
|
+
"title" => "Yasuri Test",
|
21
|
+
"body_p" => "Hello,Yasuri"
|
22
|
+
}
|
23
|
+
expect(actual).to include expected
|
24
|
+
end
|
25
|
+
|
26
|
+
it "nested multi scrape in singe page" do
|
27
|
+
map = Yasuri.map_sample do
|
28
|
+
map_group1 { text_child01 '/html/body/a[1]' }
|
29
|
+
map_group2 do
|
30
|
+
text_child01 '/html/body/a[1]'
|
31
|
+
text_child03 '/html/body/a[3]'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
actual = map.inject(@agent, @index_page)
|
35
|
+
|
36
|
+
expected = {
|
37
|
+
"group1" => {
|
38
|
+
"child01" => "child01"
|
39
|
+
},
|
40
|
+
"group2" => {
|
41
|
+
"child01" => "child01",
|
42
|
+
"child03" => "child03"
|
43
|
+
}
|
44
|
+
}
|
45
|
+
expect(actual).to include expected
|
46
|
+
end
|
47
|
+
|
48
|
+
it "scrape with links node" do
|
49
|
+
map = Yasuri.map_sample do
|
50
|
+
map_group1 do
|
51
|
+
links_a '/html/body/a' do
|
52
|
+
text_content '/html/body/p'
|
53
|
+
end
|
54
|
+
text_child01 '/html/body/a[1]'
|
55
|
+
end
|
56
|
+
map_group2 do
|
57
|
+
text_child03 '/html/body/a[3]'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
actual = map.inject(@agent, @index_page)
|
61
|
+
|
62
|
+
expected = {
|
63
|
+
"group1" => {
|
64
|
+
"a" => [
|
65
|
+
{"content" => "Child 01 page."},
|
66
|
+
{"content" => "Child 02 page."},
|
67
|
+
{"content" => "Child 03 page."},
|
68
|
+
],
|
69
|
+
"child01" => "child01"
|
70
|
+
},
|
71
|
+
"group2" => { "child03" => "child03" }
|
72
|
+
}
|
73
|
+
expect(actual).to include expected
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/spec/yasuri_spec.rb
CHANGED
@@ -13,6 +13,89 @@ describe 'Yasuri' do
|
|
13
13
|
@index_page = @agent.get(@uri)
|
14
14
|
end
|
15
15
|
|
16
|
+
############
|
17
|
+
# yam2tree #
|
18
|
+
############
|
19
|
+
describe '.yaml2tree' do
|
20
|
+
it "fail if empty yaml" do
|
21
|
+
expect { Yasuri.yaml2tree(nil) }.to raise_error(RuntimeError)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "return text node" do
|
25
|
+
src = <<-EOB
|
26
|
+
content:
|
27
|
+
node: text
|
28
|
+
path: "/html/body/p[1]"
|
29
|
+
EOB
|
30
|
+
generated = Yasuri.yaml2tree(src)
|
31
|
+
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
32
|
+
|
33
|
+
compare_generated_vs_original(generated, original, @index_page)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "return text node as symbol" do
|
37
|
+
src = <<-EOB
|
38
|
+
:content:
|
39
|
+
:node: text
|
40
|
+
:path: "/html/body/p[1]"
|
41
|
+
EOB
|
42
|
+
generated = Yasuri.yaml2tree(src)
|
43
|
+
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
44
|
+
|
45
|
+
compare_generated_vs_original(generated, original, @index_page)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "return LinksNode/TextNode" do
|
49
|
+
|
50
|
+
src = <<-EOB
|
51
|
+
root:
|
52
|
+
node: links
|
53
|
+
path: "/html/body/a"
|
54
|
+
children:
|
55
|
+
- content:
|
56
|
+
node: text
|
57
|
+
path: "/html/body/p"
|
58
|
+
EOB
|
59
|
+
generated = Yasuri.yaml2tree(src)
|
60
|
+
original = Yasuri::LinksNode.new('/html/body/a', "root", [
|
61
|
+
Yasuri::TextNode.new('/html/body/p', "content"),
|
62
|
+
])
|
63
|
+
|
64
|
+
compare_generated_vs_original(generated, original, @index_page)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "return StructNode/StructNode/[TextNode,TextNode]" do
|
68
|
+
src = <<-EOB
|
69
|
+
tables:
|
70
|
+
node: struct
|
71
|
+
path: "/html/body/table"
|
72
|
+
children:
|
73
|
+
- table:
|
74
|
+
node: struct
|
75
|
+
path: "./tr"
|
76
|
+
children:
|
77
|
+
- title:
|
78
|
+
node: text
|
79
|
+
path: "./td[1]"
|
80
|
+
- pub_date:
|
81
|
+
node: text
|
82
|
+
path: "./td[2]"
|
83
|
+
EOB
|
84
|
+
|
85
|
+
generated = Yasuri.yaml2tree(src)
|
86
|
+
original = Yasuri::StructNode.new('/html/body/table', "tables", [
|
87
|
+
Yasuri::StructNode.new('./tr', "table", [
|
88
|
+
Yasuri::TextNode.new('./td[1]', "title"),
|
89
|
+
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
90
|
+
])
|
91
|
+
])
|
92
|
+
page = @agent.get(@uri + "/struct/structual_text.html")
|
93
|
+
compare_generated_vs_original(generated, original, page)
|
94
|
+
end
|
95
|
+
|
96
|
+
end # end of describe '.yaml2tree'
|
97
|
+
|
98
|
+
|
16
99
|
#############
|
17
100
|
# json2tree #
|
18
101
|
#############
|
@@ -39,10 +122,31 @@ describe 'Yasuri' do
|
|
39
122
|
"truncate" : "^[^,]+"
|
40
123
|
}|
|
41
124
|
generated = Yasuri.json2tree(src)
|
42
|
-
original = Yasuri::TextNode.new('/html/body/p[1]', "content",
|
125
|
+
original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
|
43
126
|
compare_generated_vs_original(generated, original, @index_page)
|
44
127
|
end
|
45
128
|
|
129
|
+
it "return MapNode with TextNodes" do
|
130
|
+
src = %q| { "node" : "map",
|
131
|
+
"name" : "parent",
|
132
|
+
"children" : [
|
133
|
+
{ "node" : "text",
|
134
|
+
"name" : "content01",
|
135
|
+
"path" : "/html/body/p[1]"
|
136
|
+
},
|
137
|
+
{ "node" : "text",
|
138
|
+
"name" : "content02",
|
139
|
+
"path" : "/html/body/p[2]"
|
140
|
+
}
|
141
|
+
]
|
142
|
+
}|
|
143
|
+
generated = Yasuri.json2tree(src)
|
144
|
+
original = Yasuri::MapNode.new('parent', [
|
145
|
+
Yasuri::TextNode.new('/html/body/p[1]', "content01"),
|
146
|
+
Yasuri::TextNode.new('/html/body/p[2]', "content02"),
|
147
|
+
])
|
148
|
+
compare_generated_vs_original(generated, original, @index_page)
|
149
|
+
end
|
46
150
|
|
47
151
|
it "return LinksNode/TextNode" do
|
48
152
|
src = %q| { "node" : "links",
|
@@ -153,7 +257,7 @@ describe 'Yasuri' do
|
|
153
257
|
end
|
154
258
|
|
155
259
|
it "return text node with truncate_regexp" do
|
156
|
-
node = Yasuri::TextNode.new("/html/head/title", "title",
|
260
|
+
node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
|
157
261
|
json = Yasuri.tree2json(node)
|
158
262
|
expected_str = %q| { "node": "text",
|
159
263
|
"name": "title",
|
@@ -165,6 +269,31 @@ describe 'Yasuri' do
|
|
165
269
|
expect(actual).to match expected
|
166
270
|
end
|
167
271
|
|
272
|
+
it "return map node with text nodes" do
|
273
|
+
tree = Yasuri::MapNode.new('parent', [
|
274
|
+
Yasuri::TextNode.new('/html/body/p[1]', "content01"),
|
275
|
+
Yasuri::TextNode.new('/html/body/p[2]', "content02"),
|
276
|
+
])
|
277
|
+
actual_json = Yasuri.tree2json(tree)
|
278
|
+
|
279
|
+
expected_json = %q| { "node" : "map",
|
280
|
+
"name" : "parent",
|
281
|
+
"children" : [
|
282
|
+
{ "node" : "text",
|
283
|
+
"name" : "content01",
|
284
|
+
"path" : "/html/body/p[1]"
|
285
|
+
},
|
286
|
+
{ "node" : "text",
|
287
|
+
"name" : "content02",
|
288
|
+
"path" : "/html/body/p[2]"
|
289
|
+
}
|
290
|
+
]
|
291
|
+
}|
|
292
|
+
expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
|
293
|
+
actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
|
294
|
+
expect(actual).to match expected
|
295
|
+
end
|
296
|
+
|
168
297
|
it "return LinksNode/TextNode" do
|
169
298
|
tree = Yasuri::LinksNode.new('/html/body/a', "root", [
|
170
299
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
@@ -126,7 +126,7 @@ describe 'Yasuri' do
|
|
126
126
|
Yasuri::TextNode.new('./td[1]', "title"),
|
127
127
|
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
128
128
|
])
|
129
|
-
expected = @table_1996.map{|h|
|
129
|
+
expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
|
130
130
|
actual = node.inject(@agent, @page, symbolize_names:true)
|
131
131
|
expect(actual).to match expected
|
132
132
|
end
|
data/yasuri.gemspec
CHANGED
@@ -18,8 +18,8 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_development_dependency "bundler"
|
22
|
-
spec.add_development_dependency "rake"
|
21
|
+
spec.add_development_dependency "bundler"
|
22
|
+
spec.add_development_dependency "rake"
|
23
23
|
spec.add_development_dependency "rspec"
|
24
24
|
spec.add_development_dependency "fuubar"
|
25
25
|
spec.add_development_dependency "glint"
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yasuri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAC
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -144,8 +144,10 @@ extensions: []
|
|
144
144
|
extra_rdoc_files: []
|
145
145
|
files:
|
146
146
|
- ".coveralls.yml"
|
147
|
+
- ".github/workflows/ruby.yml"
|
147
148
|
- ".gitignore"
|
148
149
|
- ".rspec"
|
150
|
+
- ".ruby-version"
|
149
151
|
- ".travis.yml"
|
150
152
|
- Gemfile
|
151
153
|
- LICENSE
|
@@ -158,6 +160,7 @@ files:
|
|
158
160
|
- lib/yasuri/version.rb
|
159
161
|
- lib/yasuri/yasuri.rb
|
160
162
|
- lib/yasuri/yasuri_links_node.rb
|
163
|
+
- lib/yasuri/yasuri_map_node.rb
|
161
164
|
- lib/yasuri/yasuri_node.rb
|
162
165
|
- lib/yasuri/yasuri_node_generator.rb
|
163
166
|
- lib/yasuri/yasuri_paginate_node.rb
|
@@ -179,6 +182,7 @@ files:
|
|
179
182
|
- spec/servers/httpserver.rb
|
180
183
|
- spec/spec_helper.rb
|
181
184
|
- spec/yasuri_links_node_spec.rb
|
185
|
+
- spec/yasuri_map_spec.rb
|
182
186
|
- spec/yasuri_node_spec.rb
|
183
187
|
- spec/yasuri_paginate_node_spec.rb
|
184
188
|
- spec/yasuri_spec.rb
|
@@ -189,7 +193,7 @@ homepage: https://github.com/tac0x2a/yasuri
|
|
189
193
|
licenses:
|
190
194
|
- MIT
|
191
195
|
metadata: {}
|
192
|
-
post_install_message:
|
196
|
+
post_install_message:
|
193
197
|
rdoc_options: []
|
194
198
|
require_paths:
|
195
199
|
- lib
|
@@ -204,9 +208,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
204
208
|
- !ruby/object:Gem::Version
|
205
209
|
version: '0'
|
206
210
|
requirements: []
|
207
|
-
|
208
|
-
|
209
|
-
signing_key:
|
211
|
+
rubygems_version: 3.2.3
|
212
|
+
signing_key:
|
210
213
|
specification_version: 4
|
211
214
|
summary: Yasuri is easy scraping library.
|
212
215
|
test_files:
|
@@ -226,6 +229,7 @@ test_files:
|
|
226
229
|
- spec/servers/httpserver.rb
|
227
230
|
- spec/spec_helper.rb
|
228
231
|
- spec/yasuri_links_node_spec.rb
|
232
|
+
- spec/yasuri_map_spec.rb
|
229
233
|
- spec/yasuri_node_spec.rb
|
230
234
|
- spec/yasuri_paginate_node_spec.rb
|
231
235
|
- spec/yasuri_spec.rb
|