answersengine 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CODE_OF_CONDUCT.md +1 -1
- data/LICENSE.txt +1 -1
- data/README.md +3 -4
- data/answersengine.gemspec +6 -12
- data/exe/answersengine +3 -2
- data/lib/answersengine.rb +20 -3
- metadata +14 -152
- data/examples/fetchtest/libraries/hello.rb +0 -9
- data/examples/fetchtest/libraries/hello_fail.rb +0 -10
- data/examples/fetchtest/parsers/failed.rb +0 -2
- data/examples/fetchtest/parsers/find_outputs.rb +0 -18
- data/examples/fetchtest/parsers/home.rb +0 -50
- data/examples/fetchtest/parsers/nested_fail.rb +0 -3
- data/examples/fetchtest/parsers/simple.rb +0 -14
- data/examples/fetchtest/seeders/csv_seeder.rb +0 -12
- data/examples/fetchtest/seeders/failed.rb +0 -1
- data/examples/fetchtest/seeders/list_of_urls.csv +0 -5
- data/examples/fetchtest/seeders/seed.rb +0 -28
- data/examples/fetchtest/seeders/test_reset_page.rb +0 -4
- data/lib/answersengine/cli.rb +0 -45
- data/lib/answersengine/cli/env_var.rb +0 -48
- data/lib/answersengine/cli/finisher.rb +0 -40
- data/lib/answersengine/cli/global_page.rb +0 -39
- data/lib/answersengine/cli/job.rb +0 -30
- data/lib/answersengine/cli/job_output.rb +0 -69
- data/lib/answersengine/cli/parser.rb +0 -64
- data/lib/answersengine/cli/scraper.rb +0 -185
- data/lib/answersengine/cli/scraper_deployment.rb +0 -24
- data/lib/answersengine/cli/scraper_export.rb +0 -51
- data/lib/answersengine/cli/scraper_exporter.rb +0 -40
- data/lib/answersengine/cli/scraper_finisher.rb +0 -20
- data/lib/answersengine/cli/scraper_job.rb +0 -75
- data/lib/answersengine/cli/scraper_job_var.rb +0 -48
- data/lib/answersengine/cli/scraper_page.rb +0 -203
- data/lib/answersengine/cli/scraper_var.rb +0 -48
- data/lib/answersengine/cli/seeder.rb +0 -40
- data/lib/answersengine/client.rb +0 -29
- data/lib/answersengine/client/auth_token.rb +0 -50
- data/lib/answersengine/client/backblaze_content.rb +0 -45
- data/lib/answersengine/client/base.rb +0 -55
- data/lib/answersengine/client/deploy_key.rb +0 -21
- data/lib/answersengine/client/env_var.rb +0 -28
- data/lib/answersengine/client/export.rb +0 -10
- data/lib/answersengine/client/global_page.rb +0 -18
- data/lib/answersengine/client/job.rb +0 -64
- data/lib/answersengine/client/job_export.rb +0 -10
- data/lib/answersengine/client/job_log.rb +0 -26
- data/lib/answersengine/client/job_output.rb +0 -19
- data/lib/answersengine/client/job_page.rb +0 -58
- data/lib/answersengine/client/job_stat.rb +0 -16
- data/lib/answersengine/client/scraper.rb +0 -57
- data/lib/answersengine/client/scraper_deployment.rb +0 -18
- data/lib/answersengine/client/scraper_export.rb +0 -22
- data/lib/answersengine/client/scraper_exporter.rb +0 -14
- data/lib/answersengine/client/scraper_finisher.rb +0 -16
- data/lib/answersengine/client/scraper_job.rb +0 -49
- data/lib/answersengine/client/scraper_job_output.rb +0 -19
- data/lib/answersengine/client/scraper_job_page.rb +0 -67
- data/lib/answersengine/client/scraper_job_var.rb +0 -28
- data/lib/answersengine/client/scraper_var.rb +0 -28
- data/lib/answersengine/plugin.rb +0 -6
- data/lib/answersengine/plugin/context_exposer.rb +0 -55
- data/lib/answersengine/scraper.rb +0 -18
- data/lib/answersengine/scraper/executor.rb +0 -373
- data/lib/answersengine/scraper/finisher.rb +0 -18
- data/lib/answersengine/scraper/parser.rb +0 -18
- data/lib/answersengine/scraper/ruby_finisher_executor.rb +0 -116
- data/lib/answersengine/scraper/ruby_parser_executor.rb +0 -200
- data/lib/answersengine/scraper/ruby_seeder_executor.rb +0 -120
- data/lib/answersengine/scraper/seeder.rb +0 -18
- data/lib/answersengine/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7a6b853369f925f1537eb535fc8ebb3dd45fc82195b67bdd68db6452fee5eea
|
4
|
+
data.tar.gz: 84616bf96239a2cf6718e98c046bebdc849a1b527a04ff1363dbc2bfb466804a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0a10c9b24b90c2224a212fdebb48cef6d20f67cae6e468202c0b91923ac237dc88456e2159276986d180ec0e214b066e1fe9ffbfaed3b040c0a6c7a23723011
|
7
|
+
data.tar.gz: 0f00ab279ae62d1226f2c4db7fc3dee173d2e255ac7abf67d66f4c4a1ccd1dc031e66a1336e92645ad666b04128ea3b08c636c546132918ed2bf161f62ed6ccb
|
data/CODE_OF_CONDUCT.md
CHANGED
@@ -55,7 +55,7 @@ further defined and clarified by project maintainers.
|
|
55
55
|
## Enforcement
|
56
56
|
|
57
57
|
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
-
reported by contacting the project team at
|
58
|
+
reported by contacting the project team at perry@datahen.com. All
|
59
59
|
complaints will be reviewed and investigated and will result in a response that
|
60
60
|
is deemed necessary and appropriate to the circumstances. The project team is
|
61
61
|
obligated to maintain confidentiality with regard to the reporter of an incident.
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# AnswersEngine
|
1
|
+
# AnswersEngine (Deprecated)
|
2
2
|
|
3
|
-
Welcome to the AnswersEngine gem, this gem includes the client and the CLI to be able to integrate with
|
3
|
+
Welcome to the AnswersEngine gem, this gem includes the client and the CLI as an alias to [datahen](https://github.com/DataHenOfficial/datahen-ruby) gem client and CLI, to be able to integrate with datahen.com. It has been deprecated, please use [datahen](https://github.com/DataHenOfficial/datahen-ruby) gem instead.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -22,9 +22,8 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Contributing
|
24
24
|
|
25
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
25
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/DataHenOfficial/datahen-ruby. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
26
26
|
|
27
27
|
## License
|
28
28
|
|
29
29
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
30
|
-
|
data/answersengine.gemspec
CHANGED
@@ -1,17 +1,16 @@
|
|
1
1
|
|
2
2
|
lib = File.expand_path("../lib", __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require "answersengine/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
6
|
spec.name = "answersengine"
|
8
|
-
spec.version =
|
7
|
+
spec.version = "0.10.2"
|
9
8
|
spec.authors = ["Parama Danoesubroto"]
|
10
|
-
spec.email = ["
|
9
|
+
spec.email = ["perry@datahen.com"]
|
11
10
|
|
12
|
-
spec.summary = %q{
|
13
|
-
spec.description = %q{
|
14
|
-
spec.homepage = "https://
|
11
|
+
spec.summary = %q{(Deprecated: Use datahen gem instead.) Compatibility alias for DataHen toolbelt for developers.}
|
12
|
+
spec.description = %q{(Deprecated: Use datahen gem instead.) Compatibility alias for DataHen toolbelt to develop scrapers and other scripts.}
|
13
|
+
spec.homepage = "https://datahen.com"
|
15
14
|
spec.license = "MIT"
|
16
15
|
|
17
16
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
@@ -34,14 +33,9 @@ Gem::Specification.new do |spec|
|
|
34
33
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
35
34
|
spec.require_paths = ["lib"]
|
36
35
|
spec.required_ruby_version = '>= 2.2.2'
|
37
|
-
spec.add_dependency "
|
38
|
-
spec.add_dependency 'httparty', '~> 0.16.2'
|
39
|
-
spec.add_dependency 'nokogiri', '~> 1.6', '< 1.10'
|
36
|
+
spec.add_dependency "datahen", ">= 0"
|
40
37
|
spec.add_development_dependency 'bundler', '>= 1.16'
|
41
38
|
spec.add_development_dependency 'rake', '>= 10.0'
|
42
39
|
spec.add_development_dependency 'minitest', '>= 5.11'
|
43
|
-
spec.add_development_dependency 'simplecov', '>= 0.16.1'
|
44
|
-
spec.add_development_dependency 'simplecov-console', '>= 0.4.2'
|
45
|
-
spec.add_development_dependency 'timecop', '>= 0.9.1'
|
46
40
|
spec.add_development_dependency 'byebug', '>= 0'
|
47
41
|
end
|
data/exe/answersengine
CHANGED
data/lib/answersengine.rb
CHANGED
@@ -1,5 +1,22 @@
|
|
1
|
-
require "
|
2
|
-
require "answersengine/scraper"
|
1
|
+
require "datahen"
|
3
2
|
|
4
|
-
|
3
|
+
# Override env variables methods to include `ANSWERSENGINE_*` env variables
|
4
|
+
module Datahen
|
5
|
+
module Client
|
6
|
+
class Base
|
7
|
+
def self.env_auth_token
|
8
|
+
ENV['DATAHEN_TOKEN'].nil? ? ENV['ANSWERSENGINE_TOKEN'] : ENV['DATAHEN_TOKEN']
|
9
|
+
end
|
10
|
+
|
11
|
+
def env_api_url
|
12
|
+
return ENV['DATAHEN_API_URL'] unless ENV['DATAHEN_API_URL'].nil?
|
13
|
+
ENV['ANSWERSENGINE_API_URL'].nil? ? 'https://app.datahen.com/api/v1' : ENV['ANSWERSENGINE_API_URL']
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
5
17
|
end
|
18
|
+
|
19
|
+
ENV['ANSWERSENGINE_TOKEN'] = ENV['DATAHEN_TOKEN'] if ENV['ANSWERSENGINE_TOKEN'].to_s.strip == ''
|
20
|
+
|
21
|
+
# (Deprecated) Alias to Datahen module.
|
22
|
+
AnswersEngine = ::Datahen
|
metadata
CHANGED
@@ -1,63 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: answersengine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: datahen
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 0.20.3
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 0.20.3
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: httparty
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 0.16.2
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 0.16.2
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: nokogiri
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '1.6'
|
48
|
-
- - "<"
|
17
|
+
- - ">="
|
49
18
|
- !ruby/object:Gem::Version
|
50
|
-
version: '
|
19
|
+
version: '0'
|
51
20
|
type: :runtime
|
52
21
|
prerelease: false
|
53
22
|
version_requirements: !ruby/object:Gem::Requirement
|
54
23
|
requirements:
|
55
|
-
- - "
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
version: '1.6'
|
58
|
-
- - "<"
|
24
|
+
- - ">="
|
59
25
|
- !ruby/object:Gem::Version
|
60
|
-
version: '
|
26
|
+
version: '0'
|
61
27
|
- !ruby/object:Gem::Dependency
|
62
28
|
name: bundler
|
63
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,48 +66,6 @@ dependencies:
|
|
100
66
|
- - ">="
|
101
67
|
- !ruby/object:Gem::Version
|
102
68
|
version: '5.11'
|
103
|
-
- !ruby/object:Gem::Dependency
|
104
|
-
name: simplecov
|
105
|
-
requirement: !ruby/object:Gem::Requirement
|
106
|
-
requirements:
|
107
|
-
- - ">="
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
version: 0.16.1
|
110
|
-
type: :development
|
111
|
-
prerelease: false
|
112
|
-
version_requirements: !ruby/object:Gem::Requirement
|
113
|
-
requirements:
|
114
|
-
- - ">="
|
115
|
-
- !ruby/object:Gem::Version
|
116
|
-
version: 0.16.1
|
117
|
-
- !ruby/object:Gem::Dependency
|
118
|
-
name: simplecov-console
|
119
|
-
requirement: !ruby/object:Gem::Requirement
|
120
|
-
requirements:
|
121
|
-
- - ">="
|
122
|
-
- !ruby/object:Gem::Version
|
123
|
-
version: 0.4.2
|
124
|
-
type: :development
|
125
|
-
prerelease: false
|
126
|
-
version_requirements: !ruby/object:Gem::Requirement
|
127
|
-
requirements:
|
128
|
-
- - ">="
|
129
|
-
- !ruby/object:Gem::Version
|
130
|
-
version: 0.4.2
|
131
|
-
- !ruby/object:Gem::Dependency
|
132
|
-
name: timecop
|
133
|
-
requirement: !ruby/object:Gem::Requirement
|
134
|
-
requirements:
|
135
|
-
- - ">="
|
136
|
-
- !ruby/object:Gem::Version
|
137
|
-
version: 0.9.1
|
138
|
-
type: :development
|
139
|
-
prerelease: false
|
140
|
-
version_requirements: !ruby/object:Gem::Requirement
|
141
|
-
requirements:
|
142
|
-
- - ">="
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: 0.9.1
|
145
69
|
- !ruby/object:Gem::Dependency
|
146
70
|
name: byebug
|
147
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,9 +80,10 @@ dependencies:
|
|
156
80
|
- - ">="
|
157
81
|
- !ruby/object:Gem::Version
|
158
82
|
version: '0'
|
159
|
-
description:
|
83
|
+
description: "(Deprecated: Use datahen gem instead.) Compatibility alias for DataHen
|
84
|
+
toolbelt to develop scrapers and other scripts."
|
160
85
|
email:
|
161
|
-
-
|
86
|
+
- perry@datahen.com
|
162
87
|
executables:
|
163
88
|
- answersengine
|
164
89
|
extensions: []
|
@@ -174,78 +99,14 @@ files:
|
|
174
99
|
- answersengine.gemspec
|
175
100
|
- bin/console
|
176
101
|
- bin/setup
|
177
|
-
- examples/fetchtest/libraries/hello.rb
|
178
|
-
- examples/fetchtest/libraries/hello_fail.rb
|
179
|
-
- examples/fetchtest/parsers/failed.rb
|
180
|
-
- examples/fetchtest/parsers/find_outputs.rb
|
181
|
-
- examples/fetchtest/parsers/home.rb
|
182
|
-
- examples/fetchtest/parsers/nested_fail.rb
|
183
|
-
- examples/fetchtest/parsers/simple.rb
|
184
|
-
- examples/fetchtest/seeders/csv_seeder.rb
|
185
|
-
- examples/fetchtest/seeders/failed.rb
|
186
|
-
- examples/fetchtest/seeders/list_of_urls.csv
|
187
|
-
- examples/fetchtest/seeders/seed.rb
|
188
|
-
- examples/fetchtest/seeders/test_reset_page.rb
|
189
102
|
- exe/answersengine
|
190
103
|
- lib/answersengine.rb
|
191
|
-
|
192
|
-
- lib/answersengine/cli/env_var.rb
|
193
|
-
- lib/answersengine/cli/finisher.rb
|
194
|
-
- lib/answersengine/cli/global_page.rb
|
195
|
-
- lib/answersengine/cli/job.rb
|
196
|
-
- lib/answersengine/cli/job_output.rb
|
197
|
-
- lib/answersengine/cli/parser.rb
|
198
|
-
- lib/answersengine/cli/scraper.rb
|
199
|
-
- lib/answersengine/cli/scraper_deployment.rb
|
200
|
-
- lib/answersengine/cli/scraper_export.rb
|
201
|
-
- lib/answersengine/cli/scraper_exporter.rb
|
202
|
-
- lib/answersengine/cli/scraper_finisher.rb
|
203
|
-
- lib/answersengine/cli/scraper_job.rb
|
204
|
-
- lib/answersengine/cli/scraper_job_var.rb
|
205
|
-
- lib/answersengine/cli/scraper_page.rb
|
206
|
-
- lib/answersengine/cli/scraper_var.rb
|
207
|
-
- lib/answersengine/cli/seeder.rb
|
208
|
-
- lib/answersengine/client.rb
|
209
|
-
- lib/answersengine/client/auth_token.rb
|
210
|
-
- lib/answersengine/client/backblaze_content.rb
|
211
|
-
- lib/answersengine/client/base.rb
|
212
|
-
- lib/answersengine/client/deploy_key.rb
|
213
|
-
- lib/answersengine/client/env_var.rb
|
214
|
-
- lib/answersengine/client/export.rb
|
215
|
-
- lib/answersengine/client/global_page.rb
|
216
|
-
- lib/answersengine/client/job.rb
|
217
|
-
- lib/answersengine/client/job_export.rb
|
218
|
-
- lib/answersengine/client/job_log.rb
|
219
|
-
- lib/answersengine/client/job_output.rb
|
220
|
-
- lib/answersengine/client/job_page.rb
|
221
|
-
- lib/answersengine/client/job_stat.rb
|
222
|
-
- lib/answersengine/client/scraper.rb
|
223
|
-
- lib/answersengine/client/scraper_deployment.rb
|
224
|
-
- lib/answersengine/client/scraper_export.rb
|
225
|
-
- lib/answersengine/client/scraper_exporter.rb
|
226
|
-
- lib/answersengine/client/scraper_finisher.rb
|
227
|
-
- lib/answersengine/client/scraper_job.rb
|
228
|
-
- lib/answersengine/client/scraper_job_output.rb
|
229
|
-
- lib/answersengine/client/scraper_job_page.rb
|
230
|
-
- lib/answersengine/client/scraper_job_var.rb
|
231
|
-
- lib/answersengine/client/scraper_var.rb
|
232
|
-
- lib/answersengine/plugin.rb
|
233
|
-
- lib/answersengine/plugin/context_exposer.rb
|
234
|
-
- lib/answersengine/scraper.rb
|
235
|
-
- lib/answersengine/scraper/executor.rb
|
236
|
-
- lib/answersengine/scraper/finisher.rb
|
237
|
-
- lib/answersengine/scraper/parser.rb
|
238
|
-
- lib/answersengine/scraper/ruby_finisher_executor.rb
|
239
|
-
- lib/answersengine/scraper/ruby_parser_executor.rb
|
240
|
-
- lib/answersengine/scraper/ruby_seeder_executor.rb
|
241
|
-
- lib/answersengine/scraper/seeder.rb
|
242
|
-
- lib/answersengine/version.rb
|
243
|
-
homepage: https://answersengine.com
|
104
|
+
homepage: https://datahen.com
|
244
105
|
licenses:
|
245
106
|
- MIT
|
246
107
|
metadata:
|
247
108
|
allowed_push_host: https://rubygems.org
|
248
|
-
homepage_uri: https://
|
109
|
+
homepage_uri: https://datahen.com
|
249
110
|
source_code_uri: https://github.com/answersengine/answersengine
|
250
111
|
post_install_message:
|
251
112
|
rdoc_options: []
|
@@ -266,5 +127,6 @@ rubyforge_project:
|
|
266
127
|
rubygems_version: 2.7.6
|
267
128
|
signing_key:
|
268
129
|
specification_version: 4
|
269
|
-
summary:
|
130
|
+
summary: "(Deprecated: Use datahen gem instead.) Compatibility alias for DataHen toolbelt
|
131
|
+
for developers."
|
270
132
|
test_files: []
|
@@ -1,18 +0,0 @@
|
|
1
|
-
|
2
|
-
puts
|
3
|
-
puts "list all output on a collection"
|
4
|
-
puts find_outputs('home', {},2).count
|
5
|
-
puts find_outputs('home', {})
|
6
|
-
|
7
|
-
puts "find_outputs"
|
8
|
-
puts find_outputs('home', "_id": "b3d6f737731842b2be198fc3a85283b7")
|
9
|
-
puts
|
10
|
-
puts "find_outputs not found"
|
11
|
-
puts find_outputs('home', "_id": "b3d6f737731842b2be198fc3a85283b7--").inspect
|
12
|
-
# puts nil['_collection']
|
13
|
-
puts
|
14
|
-
puts "find_output"
|
15
|
-
puts find_output('home',"_id": "b3d6f737731842b2be198fc3a85283b7")['_collection']
|
16
|
-
puts
|
17
|
-
puts "find_output not found"
|
18
|
-
puts find_output('home',"_id": "b3d6f737731842b2be198fc3a85283b7--").inspect
|
@@ -1,50 +0,0 @@
|
|
1
|
-
puts `pwd`
|
2
|
-
|
3
|
-
require './libraries/hello'
|
4
|
-
|
5
|
-
hello = Hello.new
|
6
|
-
puts "hello say #{hello.say}"
|
7
|
-
puts "page gid:#{page['gid']}"
|
8
|
-
puts "page #{page}"
|
9
|
-
|
10
|
-
puts "content #{content}"
|
11
|
-
|
12
|
-
nokogiri = Nokogiri.HTML(content)
|
13
|
-
|
14
|
-
h1 = nokogiri.at('h1')
|
15
|
-
heading = h1.nil? ? '' : h1.text
|
16
|
-
text = nokogiri.text
|
17
|
-
|
18
|
-
doc1 = {
|
19
|
-
_collection: "home",
|
20
|
-
# _id: "1234",
|
21
|
-
text: text,
|
22
|
-
heading: heading,
|
23
|
-
response_headers: page['response_headers'],
|
24
|
-
some_vars: page['vars']
|
25
|
-
# url: page.url
|
26
|
-
}
|
27
|
-
doc2 = {
|
28
|
-
_collection: "home",
|
29
|
-
# _id: "12345",
|
30
|
-
text: text,
|
31
|
-
heading: heading,
|
32
|
-
response_headers: page['response_headers'],
|
33
|
-
some_vars: page['vars']
|
34
|
-
# url: page.url
|
35
|
-
}
|
36
|
-
|
37
|
-
|
38
|
-
outputs << doc1
|
39
|
-
outputs << {}
|
40
|
-
outputs << doc2
|
41
|
-
|
42
|
-
|
43
|
-
pages << {
|
44
|
-
url: "http://fetchtest.datahen.com/statuses/200?q=queuedFromParserWithVars",
|
45
|
-
vars: {"abc":[1,2,3], "def": "defcontent"}
|
46
|
-
}
|
47
|
-
|
48
|
-
puts "inspect page: #{page}"
|
49
|
-
|
50
|
-
puts "inspect vars: #{page['vars']}"
|
@@ -1,14 +0,0 @@
|
|
1
|
-
nokogiri = Nokogiri.HTML(content)
|
2
|
-
|
3
|
-
outputs << {
|
4
|
-
_collection: "home",
|
5
|
-
_id: "1234",
|
6
|
-
text: nokogiri.text,
|
7
|
-
heading: nokogiri.at('h1').text,
|
8
|
-
response_headers: page['response_headers'],
|
9
|
-
}
|
10
|
-
|
11
|
-
pages << {
|
12
|
-
url: "http://fetchtest.datahen.com/statuses/200?q=queuedFromParser",
|
13
|
-
vars: {"abc":[1,2,3], "def": "defcontent"}
|
14
|
-
}
|