apify_core 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,24 @@
1
+ {
2
+ "index": {
3
+ "url": ["https://github.com/blog"],
4
+ "js": false,
5
+ "paginate": [
6
+ "\\/?+$",
7
+ "/?page=<% 1,2,1 %>"
8
+ ]
9
+ },
10
+ "posts": {
11
+ "from": "select('h2.blog-post-title a') from('index')",
12
+ "js": false,
13
+ "host": "http://github.com",
14
+ "pattern": {
15
+ "title": "<% .blog-title %>",
16
+ "meta": {
17
+ "calendar": "<% .blog-post-meta li:first %>",
18
+ "author": "<% .blog-post-meta .vcard %>",
19
+ "category": "<% .blog-post-meta li:last %>"
20
+ },
21
+ "body": "<% .blog-post-body %>"
22
+ }
23
+ }
24
+ }
@@ -0,0 +1,21 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://oblomoff.com.ua/events/page/2/",
5
+ "http://oblomoff.com.ua/events/"
6
+ ],
7
+ "js": false,
8
+ "host": "http://oblomoff.com.ua/"
9
+ },
10
+ "events": {
11
+ "from": "select('h2.caption a') from('index')",
12
+ "js": false,
13
+ "host": "http://oblomoff.com.ua",
14
+ "pattern": {
15
+ "title": "<% h2.caption %>",
16
+ "date": "<% h3.info %>",
17
+ "images": "<% .story-slider img | mapattr_src %>",
18
+ "description": "<% .text.story | first | html %>"
19
+ }
20
+ }
21
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://kh.vgorode.ua/ajax/module.aspx?spm_id=2912&city_id=36&d=020115&IsAjax=true&t=635558089790624457&ri=0"
5
+ ],
6
+ "js": false,
7
+ "host": "http://kh.vgorode.ua"
8
+ },
9
+ "events": {
10
+ "from": "select('.art_cell .title a') from('index')",
11
+ "js": false,
12
+ "host": "http://kh.vgorode.ua",
13
+ "pattern": {
14
+ "title": "<% h1 %>",
15
+ "images": "<% .article img | mapattr_src %>",
16
+ "description": "<% .article .body | first | html %>",
17
+ "date": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Когда:'] %>",
18
+ "address": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Где:'] %>",
19
+ "price": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Цена:'] %>",
20
+ "city": "Харьков"
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://kh.vgorode.ua/ajax/module.aspx?spm_id=2912&city_id=36&d=020115&IsAjax=true&t=635558089790624457&ri=0"
5
+ ],
6
+ "js": false,
7
+ "host": "http://kh.vgorode.ua"
8
+ },
9
+ "events": {
10
+ "from": "select('.art_cell .title a') from('index')",
11
+ "js": false,
12
+ "host": "http://kh.vgorode.ua",
13
+ "pattern": {
14
+ "title": "<% h1 %>",
15
+ "images": "<% .article img | mapattr_src %>",
16
+ "description": "<% .article .body | first | html %>",
17
+ "date": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Когда:'] %>",
18
+ "address": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Где:'] %>",
19
+ "price": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Цена:'] %>",
20
+ "city": "Харьков"
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://kh.vgorode.ua/ajax/module.aspx?spm_id=2912&city_id=36&d=020115&IsAjax=true&t=635558089790624457&ri=0"
5
+ ],
6
+ "js": false,
7
+ "host": "http://kh.vgorode.ua"
8
+ },
9
+ "events": {
10
+ "from": "select('.art_cell .title a') from('index')",
11
+ "js": false,
12
+ "host": "http://kh.vgorode.ua",
13
+ "pattern": {
14
+ "title": "<% h1 %>",
15
+ "images": "<% .article img | mapattr_src %>",
16
+ "description": "<% .article .body | first | html %>",
17
+ "date": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Когда:'] %>",
18
+ "address": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Где:'] %>",
19
+ "price": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Цена:'] %>",
20
+ "city": "Харьков"
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://kh.vgorode.ua/ajax/module.aspx?spm_id=2912&city_id=38&d=020115&IsAjax=true&t=635558089790624457&ri=0"
5
+ ],
6
+ "js": false,
7
+ "host": "http://kh.vgorode.ua"
8
+ },
9
+ "events": {
10
+ "from": "select('.art_cell .title a') from('index')",
11
+ "js": false,
12
+ "host": "http://kh.vgorode.ua",
13
+ "pattern": {
14
+ "title": "<% h1 %>",
15
+ "images": "<% .article img | mapattr_src %>",
16
+ "description": "<% .article .body | first | html %>",
17
+ "date": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Когда:'] %>",
18
+ "address": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Где:'] %>",
19
+ "price": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Цена:'] %>",
20
+ "city": "Харьков"
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://kh.vgorode.ua/ajax/module.aspx?spm_id=2912&city_id=36&d=020115&IsAjax=true&t=635558089790624457&ri=0"
5
+ ],
6
+ "js": false,
7
+ "host": "http://kh.vgorode.ua"
8
+ },
9
+ "events": {
10
+ "from": "select('.art_cell .title a') from('index')",
11
+ "js": false,
12
+ "host": "http://kh.vgorode.ua",
13
+ "pattern": {
14
+ "title": "<% h1 %>",
15
+ "images": "<% .article img | mapattr_src %>",
16
+ "description": "<% .article .body | first | html %>",
17
+ "date": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Когда:'] %>",
18
+ "address": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Где:'] %>",
19
+ "price": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Цена:'] %>",
20
+ "city": "Харьков"
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://kh.vgorode.ua/ajax/module.aspx?spm_id=2912&city_id=36&d=020115&IsAjax=true&t=635558089790624457&ri=0"
5
+ ],
6
+ "js": false,
7
+ "host": "http://kh.vgorode.ua"
8
+ },
9
+ "events": {
10
+ "from": "select('.art_cell .title a') from('index')",
11
+ "js": false,
12
+ "host": "http://kh.vgorode.ua",
13
+ "pattern": {
14
+ "title": "<% h1 %>",
15
+ "images": "<% .article img | mapattr_src %>",
16
+ "description": "<% .article .body | first | html %>",
17
+ "date": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Когда:'] %>",
18
+ "address": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Где:'] %>",
19
+ "price": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Цена:'] %>",
20
+ "city": "Харьков"
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://kh.vgorode.ua/ajax/module.aspx?spm_id=2912&city_id=36&d=020115&IsAjax=true&t=635558089790624457&ri=0"
5
+ ],
6
+ "js": false,
7
+ "host": "http://kh.vgorode.ua"
8
+ },
9
+ "events": {
10
+ "from": "select('.art_cell .title a') from('index')",
11
+ "js": false,
12
+ "host": "http://kh.vgorode.ua",
13
+ "pattern": {
14
+ "title": "<% h1 %>",
15
+ "images": "<% .article img | mapattr_src %>",
16
+ "description": "<% .article .body | first | html %>",
17
+ "date": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Когда:'] %>",
18
+ "address": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Где:'] %>",
19
+ "price": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Цена:'] %>",
20
+ "city": "Харьков"
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "index": {
3
+ "url": [
4
+ "http://kh.vgorode.ua/ajax/module.aspx?spm_id=2912&city_id=36&d=020115&IsAjax=true&t=635558089790624457&ri=0"
5
+ ],
6
+ "js": false,
7
+ "host": "http://kh.vgorode.ua"
8
+ },
9
+ "events": {
10
+ "from": "select('.art_cell .title a') from('index')",
11
+ "js": false,
12
+ "host": "http://kh.vgorode.ua",
13
+ "pattern": {
14
+ "title": "<% h1 %>",
15
+ "images": "<% .article img | mapattr_src %>",
16
+ "description": "<% .article .body | first | html %>",
17
+ "date": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Когда:'] %>",
18
+ "address": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Где:'] %>",
19
+ "price": "<% //div[@class='col-sm-10' and ../div[@class='col-sm-2']/text() = 'Цена:'] %>",
20
+ "city": "Харьков"
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,8 @@
1
+ require 'bundler/setup'
2
+ Bundler.require
3
+
4
+ require 'apify_core'
5
+
6
+ RSpec.configure do |config|
7
+
8
+ end
metadata ADDED
@@ -0,0 +1,247 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: apify_core
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - victorvsk
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 3.0.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: watir-webdriver
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rest_client
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: headless
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: parallel
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 1.6.5
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 1.6.5
125
+ - !ruby/object:Gem::Dependency
126
+ name: json
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: activesupport
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ description: Simple API to transform from simple HTML to JSON to entire website to
154
+ JSON.
155
+ email:
156
+ - victor@vyskrebentsev.ru
157
+ executables:
158
+ - bundler
159
+ - coderay
160
+ - htmldiff
161
+ - ldiff
162
+ - nokogiri
163
+ - pry
164
+ - rackup
165
+ - rake
166
+ - rspec
167
+ - server
168
+ - tilt
169
+ extensions: []
170
+ extra_rdoc_files: []
171
+ files:
172
+ - Gemfile
173
+ - Gemfile.lock
174
+ - LICENSE.txt
175
+ - README.md
176
+ - Rakefile
177
+ - apify_core.gemspec
178
+ - bin/bundler
179
+ - bin/coderay
180
+ - bin/htmldiff
181
+ - bin/ldiff
182
+ - bin/nokogiri
183
+ - bin/pry
184
+ - bin/rackup
185
+ - bin/rake
186
+ - bin/rspec
187
+ - bin/server
188
+ - bin/tilt
189
+ - lib/apify_core.rb
190
+ - lib/apify_core/fetcher.rb
191
+ - lib/apify_core/filter.rb
192
+ - lib/apify_core/parser.rb
193
+ - lib/apify_core/version.rb
194
+ - spec/complex_spec.rb
195
+ - spec/examples/apify_request.json
196
+ - spec/examples/apify_response.json
197
+ - spec/examples/github_blog_request.json
198
+ - spec/examples/oblomoff_events_request.json
199
+ - spec/examples/vgorode_dn_events_request.json
200
+ - spec/examples/vgorode_dp_events_request.json
201
+ - spec/examples/vgorode_kh_events_request.json
202
+ - spec/examples/vgorode_kiev_events_request.json
203
+ - spec/examples/vgorode_lg_events_request.json
204
+ - spec/examples/vgorode_lviv_events_request.json
205
+ - spec/examples/vgorode_od_events_request.json
206
+ - spec/examples/vgorode_zp_events_request.json
207
+ - spec/spec_helper.rb
208
+ homepage: ''
209
+ licenses:
210
+ - MIT
211
+ metadata: {}
212
+ post_install_message:
213
+ rdoc_options: []
214
+ require_paths:
215
+ - lib
216
+ required_ruby_version: !ruby/object:Gem::Requirement
217
+ requirements:
218
+ - - ">="
219
+ - !ruby/object:Gem::Version
220
+ version: '0'
221
+ required_rubygems_version: !ruby/object:Gem::Requirement
222
+ requirements:
223
+ - - ">="
224
+ - !ruby/object:Gem::Version
225
+ version: '0'
226
+ requirements: []
227
+ rubyforge_project:
228
+ rubygems_version: 2.4.3
229
+ signing_key:
230
+ specification_version: 4
231
+ summary: Core part of Apify project. An easy way to parse HTML\XML content and crawl
232
+ websites in a normalized and centralized way.
233
+ test_files:
234
+ - spec/complex_spec.rb
235
+ - spec/examples/apify_request.json
236
+ - spec/examples/apify_response.json
237
+ - spec/examples/github_blog_request.json
238
+ - spec/examples/oblomoff_events_request.json
239
+ - spec/examples/vgorode_dn_events_request.json
240
+ - spec/examples/vgorode_dp_events_request.json
241
+ - spec/examples/vgorode_kh_events_request.json
242
+ - spec/examples/vgorode_kiev_events_request.json
243
+ - spec/examples/vgorode_lg_events_request.json
244
+ - spec/examples/vgorode_lviv_events_request.json
245
+ - spec/examples/vgorode_od_events_request.json
246
+ - spec/examples/vgorode_zp_events_request.json
247
+ - spec/spec_helper.rb