selenium_spider 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +12 -3
- data/lib/selenium_spider/command_line.rb +5 -3
- data/lib/selenium_spider/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8e5885c23382809cfc9f1a0d450b1a5ea873cd2
|
4
|
+
data.tar.gz: 50be3827ee35809162ea8b24f44cd63543af34be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 68b59eb6225dca883a04a1691c0f271e8df7eeac45c72f333c8688afbc5616d6e6a8ca39a612dd945d7300c46f8013bc4cc97eeecbb28c5b27556fb19099d3fd
|
7
|
+
data.tar.gz: cbb8f998eb53e60337797d3978152025921c004184096e10f3ba92ee2747beec3577b79d21faed3c00e4166a2438917a0163bf7680c33f9d366aaa153a727932
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -11,7 +11,7 @@ This will have these features:
|
|
11
11
|
|
12
12
|
Based on Selenium Standalone DSL which run Firefox headlessly, it comprehences JavaScript completely.
|
13
13
|
|
14
|
-
###
|
14
|
+
### MPC architecture
|
15
15
|
|
16
16
|
MPC = Model Pagination Controller
|
17
17
|
|
@@ -19,11 +19,20 @@ Generally, scraping is consist of two parts: Listing page and Detail page.
|
|
19
19
|
|
20
20
|
In MPC architecture, Model is for extracting information from detail page and store data to database.
|
21
21
|
|
22
|
-
|
22
|
+
Pagination is for listing items and pagenation.
|
23
23
|
|
24
24
|
Controller is for handling the above two.
|
25
25
|
|
26
|
-
###
|
26
|
+
### Code generator
|
27
|
+
|
28
|
+
```sh
|
29
|
+
selenium-spider generate --site yahoo
|
30
|
+
#=> create app/models/yahoo.rb
|
31
|
+
#=> create app/paginations/yahoo_pagination.rb
|
32
|
+
#=> create app/controllers/yahoo_controller.rb
|
33
|
+
```
|
34
|
+
|
35
|
+
### Web-based task execution(Comming)
|
27
36
|
|
28
37
|
Scraping tasks are often multiply and difficult to arrange.
|
29
38
|
|
@@ -7,6 +7,8 @@ require 'tilt'
|
|
7
7
|
$LOAD_PATH.unshift File.expand_path('../../../app', __FILE__)
|
8
8
|
$LOAD_PATH.unshift File.expand_path('../../../examples', __FILE__)
|
9
9
|
|
10
|
+
$LOAD_PATH.unshift File.expand_path('../app', ENV['BUNDLE_GEMFILE'])
|
11
|
+
|
10
12
|
module SeleniumSpider
|
11
13
|
class CommandLine
|
12
14
|
def self.execute(options)
|
@@ -46,8 +48,8 @@ module SeleniumSpider
|
|
46
48
|
mkdir_if_not_exist './app/paginations/'
|
47
49
|
mkdir_if_not_exist './app/controllers/'
|
48
50
|
|
49
|
-
gem_root = File.expand_path('../
|
50
|
-
generation_path = "#{gem_root}/
|
51
|
+
gem_root = File.expand_path('../', __FILE__)
|
52
|
+
generation_path = "#{gem_root}/generations"
|
51
53
|
|
52
54
|
generate_class "#{generation_path}/model.rb.erb",
|
53
55
|
"./app/models/#{@options[:site]}.rb"
|
@@ -75,7 +77,7 @@ module SeleniumSpider
|
|
75
77
|
return if File.exist? path
|
76
78
|
|
77
79
|
require 'fileutils'
|
78
|
-
FileUtils.mkdir_p
|
80
|
+
FileUtils.mkdir_p path
|
79
81
|
end
|
80
82
|
end
|
81
83
|
end
|