datahen 0.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +7 -0
  4. data/CODE_OF_CONDUCT.md +74 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +29 -0
  8. data/Rakefile +22 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/datahen.gemspec +47 -0
  12. data/examples/fetchtest/libraries/hello.rb +9 -0
  13. data/examples/fetchtest/libraries/hello_fail.rb +10 -0
  14. data/examples/fetchtest/parsers/failed.rb +2 -0
  15. data/examples/fetchtest/parsers/find_outputs.rb +18 -0
  16. data/examples/fetchtest/parsers/home.rb +50 -0
  17. data/examples/fetchtest/parsers/nested_fail.rb +3 -0
  18. data/examples/fetchtest/parsers/simple.rb +14 -0
  19. data/examples/fetchtest/seeders/csv_seeder.rb +12 -0
  20. data/examples/fetchtest/seeders/failed.rb +1 -0
  21. data/examples/fetchtest/seeders/list_of_urls.csv +5 -0
  22. data/examples/fetchtest/seeders/seed.rb +28 -0
  23. data/examples/fetchtest/seeders/test_reset_page.rb +4 -0
  24. data/exe/hen +3 -0
  25. data/lib/datahen.rb +5 -0
  26. data/lib/datahen/cli.rb +45 -0
  27. data/lib/datahen/cli/env_var.rb +48 -0
  28. data/lib/datahen/cli/finisher.rb +40 -0
  29. data/lib/datahen/cli/global_page.rb +39 -0
  30. data/lib/datahen/cli/job.rb +30 -0
  31. data/lib/datahen/cli/job_output.rb +69 -0
  32. data/lib/datahen/cli/parser.rb +64 -0
  33. data/lib/datahen/cli/scraper.rb +185 -0
  34. data/lib/datahen/cli/scraper_deployment.rb +24 -0
  35. data/lib/datahen/cli/scraper_export.rb +51 -0
  36. data/lib/datahen/cli/scraper_exporter.rb +40 -0
  37. data/lib/datahen/cli/scraper_finisher.rb +20 -0
  38. data/lib/datahen/cli/scraper_job.rb +75 -0
  39. data/lib/datahen/cli/scraper_job_var.rb +48 -0
  40. data/lib/datahen/cli/scraper_page.rb +203 -0
  41. data/lib/datahen/cli/scraper_var.rb +48 -0
  42. data/lib/datahen/cli/seeder.rb +40 -0
  43. data/lib/datahen/client.rb +29 -0
  44. data/lib/datahen/client/auth_token.rb +50 -0
  45. data/lib/datahen/client/backblaze_content.rb +45 -0
  46. data/lib/datahen/client/base.rb +69 -0
  47. data/lib/datahen/client/deploy_key.rb +21 -0
  48. data/lib/datahen/client/env_var.rb +28 -0
  49. data/lib/datahen/client/export.rb +10 -0
  50. data/lib/datahen/client/global_page.rb +18 -0
  51. data/lib/datahen/client/job.rb +64 -0
  52. data/lib/datahen/client/job_export.rb +10 -0
  53. data/lib/datahen/client/job_log.rb +26 -0
  54. data/lib/datahen/client/job_output.rb +19 -0
  55. data/lib/datahen/client/job_page.rb +58 -0
  56. data/lib/datahen/client/job_stat.rb +16 -0
  57. data/lib/datahen/client/scraper.rb +57 -0
  58. data/lib/datahen/client/scraper_deployment.rb +18 -0
  59. data/lib/datahen/client/scraper_export.rb +22 -0
  60. data/lib/datahen/client/scraper_exporter.rb +14 -0
  61. data/lib/datahen/client/scraper_finisher.rb +16 -0
  62. data/lib/datahen/client/scraper_job.rb +49 -0
  63. data/lib/datahen/client/scraper_job_output.rb +19 -0
  64. data/lib/datahen/client/scraper_job_page.rb +67 -0
  65. data/lib/datahen/client/scraper_job_var.rb +28 -0
  66. data/lib/datahen/client/scraper_var.rb +28 -0
  67. data/lib/datahen/plugin.rb +6 -0
  68. data/lib/datahen/plugin/context_exposer.rb +55 -0
  69. data/lib/datahen/scraper.rb +18 -0
  70. data/lib/datahen/scraper/executor.rb +373 -0
  71. data/lib/datahen/scraper/finisher.rb +18 -0
  72. data/lib/datahen/scraper/parser.rb +18 -0
  73. data/lib/datahen/scraper/ruby_finisher_executor.rb +116 -0
  74. data/lib/datahen/scraper/ruby_parser_executor.rb +200 -0
  75. data/lib/datahen/scraper/ruby_seeder_executor.rb +120 -0
  76. data/lib/datahen/scraper/seeder.rb +18 -0
  77. data/lib/datahen/version.rb +3 -0
  78. metadata +270 -0
@@ -0,0 +1,18 @@
1
+ module Datahen
2
+ module Scraper
3
+ class Seeder
4
+
5
+ def self.exec_seeder(filename, job_id=nil, save=false)
6
+ extname = File.extname(filename)
7
+ case extname
8
+ when '.rb'
9
+ executor = RubySeederExecutor.new(filename: filename, job_id: job_id)
10
+ executor.exec_seeder(save)
11
+ else
12
+ puts "Unable to find a seeder executor for file type \"#{extname}\""
13
+ end
14
+ end
15
+
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,3 @@
1
+ module Datahen
2
+ VERSION = "0.10.4"
3
+ end
metadata ADDED
@@ -0,0 +1,270 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: datahen
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.10.4
5
+ platform: ruby
6
+ authors:
7
+ - Parama Danoesubroto
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-12-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: thor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.20.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.20.3
27
+ - !ruby/object:Gem::Dependency
28
+ name: httparty
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.16.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.16.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ - - "<"
49
+ - !ruby/object:Gem::Version
50
+ version: '1.10'
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '1.6'
58
+ - - "<"
59
+ - !ruby/object:Gem::Version
60
+ version: '1.10'
61
+ - !ruby/object:Gem::Dependency
62
+ name: bundler
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '1.16'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '1.16'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rake
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '10.0'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '10.0'
89
+ - !ruby/object:Gem::Dependency
90
+ name: minitest
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '5.11'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '5.11'
103
+ - !ruby/object:Gem::Dependency
104
+ name: simplecov
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 0.16.1
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: 0.16.1
117
+ - !ruby/object:Gem::Dependency
118
+ name: simplecov-console
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: 0.4.2
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: 0.4.2
131
+ - !ruby/object:Gem::Dependency
132
+ name: timecop
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: 0.9.1
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: 0.9.1
145
+ - !ruby/object:Gem::Dependency
146
+ name: byebug
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - ">="
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ description: DataHen toolbelt to develop scrapers and other scripts
160
+ email:
161
+ - parama@datahen.com
162
+ executables:
163
+ - hen
164
+ extensions: []
165
+ extra_rdoc_files: []
166
+ files:
167
+ - ".gitignore"
168
+ - ".travis.yml"
169
+ - CODE_OF_CONDUCT.md
170
+ - Gemfile
171
+ - LICENSE.txt
172
+ - README.md
173
+ - Rakefile
174
+ - bin/console
175
+ - bin/setup
176
+ - datahen.gemspec
177
+ - examples/fetchtest/libraries/hello.rb
178
+ - examples/fetchtest/libraries/hello_fail.rb
179
+ - examples/fetchtest/parsers/failed.rb
180
+ - examples/fetchtest/parsers/find_outputs.rb
181
+ - examples/fetchtest/parsers/home.rb
182
+ - examples/fetchtest/parsers/nested_fail.rb
183
+ - examples/fetchtest/parsers/simple.rb
184
+ - examples/fetchtest/seeders/csv_seeder.rb
185
+ - examples/fetchtest/seeders/failed.rb
186
+ - examples/fetchtest/seeders/list_of_urls.csv
187
+ - examples/fetchtest/seeders/seed.rb
188
+ - examples/fetchtest/seeders/test_reset_page.rb
189
+ - exe/hen
190
+ - lib/datahen.rb
191
+ - lib/datahen/cli.rb
192
+ - lib/datahen/cli/env_var.rb
193
+ - lib/datahen/cli/finisher.rb
194
+ - lib/datahen/cli/global_page.rb
195
+ - lib/datahen/cli/job.rb
196
+ - lib/datahen/cli/job_output.rb
197
+ - lib/datahen/cli/parser.rb
198
+ - lib/datahen/cli/scraper.rb
199
+ - lib/datahen/cli/scraper_deployment.rb
200
+ - lib/datahen/cli/scraper_export.rb
201
+ - lib/datahen/cli/scraper_exporter.rb
202
+ - lib/datahen/cli/scraper_finisher.rb
203
+ - lib/datahen/cli/scraper_job.rb
204
+ - lib/datahen/cli/scraper_job_var.rb
205
+ - lib/datahen/cli/scraper_page.rb
206
+ - lib/datahen/cli/scraper_var.rb
207
+ - lib/datahen/cli/seeder.rb
208
+ - lib/datahen/client.rb
209
+ - lib/datahen/client/auth_token.rb
210
+ - lib/datahen/client/backblaze_content.rb
211
+ - lib/datahen/client/base.rb
212
+ - lib/datahen/client/deploy_key.rb
213
+ - lib/datahen/client/env_var.rb
214
+ - lib/datahen/client/export.rb
215
+ - lib/datahen/client/global_page.rb
216
+ - lib/datahen/client/job.rb
217
+ - lib/datahen/client/job_export.rb
218
+ - lib/datahen/client/job_log.rb
219
+ - lib/datahen/client/job_output.rb
220
+ - lib/datahen/client/job_page.rb
221
+ - lib/datahen/client/job_stat.rb
222
+ - lib/datahen/client/scraper.rb
223
+ - lib/datahen/client/scraper_deployment.rb
224
+ - lib/datahen/client/scraper_export.rb
225
+ - lib/datahen/client/scraper_exporter.rb
226
+ - lib/datahen/client/scraper_finisher.rb
227
+ - lib/datahen/client/scraper_job.rb
228
+ - lib/datahen/client/scraper_job_output.rb
229
+ - lib/datahen/client/scraper_job_page.rb
230
+ - lib/datahen/client/scraper_job_var.rb
231
+ - lib/datahen/client/scraper_var.rb
232
+ - lib/datahen/plugin.rb
233
+ - lib/datahen/plugin/context_exposer.rb
234
+ - lib/datahen/scraper.rb
235
+ - lib/datahen/scraper/executor.rb
236
+ - lib/datahen/scraper/finisher.rb
237
+ - lib/datahen/scraper/parser.rb
238
+ - lib/datahen/scraper/ruby_finisher_executor.rb
239
+ - lib/datahen/scraper/ruby_parser_executor.rb
240
+ - lib/datahen/scraper/ruby_seeder_executor.rb
241
+ - lib/datahen/scraper/seeder.rb
242
+ - lib/datahen/version.rb
243
+ homepage: https://datahen.com
244
+ licenses:
245
+ - MIT
246
+ metadata:
247
+ allowed_push_host: https://rubygems.org
248
+ homepage_uri: https://datahen.com
249
+ source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
250
+ post_install_message:
251
+ rdoc_options: []
252
+ require_paths:
253
+ - lib
254
+ required_ruby_version: !ruby/object:Gem::Requirement
255
+ requirements:
256
+ - - ">="
257
+ - !ruby/object:Gem::Version
258
+ version: 2.2.2
259
+ required_rubygems_version: !ruby/object:Gem::Requirement
260
+ requirements:
261
+ - - ">="
262
+ - !ruby/object:Gem::Version
263
+ version: '0'
264
+ requirements: []
265
+ rubyforge_project:
266
+ rubygems_version: 2.7.6
267
+ signing_key:
268
+ specification_version: 4
269
+ summary: DataHen toolbelt for developers
270
+ test_files: []