Dynamised 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/dynamised +3 -0
- data/lib/dynamised/after_scrape_methods.rb +8 -0
- data/lib/dynamised/meta.rb +1 -1
- data/lib/dynamised/scraper.rb +8 -1
- data/lib/dynamised.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 92c09eaab0e73d11f5961faa906a83bf1029fc19
|
4
|
+
data.tar.gz: c67b20e85a6e72ede471fb285ae91bfe444c6786
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5846634eb62634d1006d7a0dd61a0afbb8da0a3abb4d960a42358d245e14dfa110805662fdd51fbd1599286e7cc8f4b8d08a9999e9af0b1b9360f2c60941b352
|
7
|
+
data.tar.gz: b7937da48069fefabd9ae5118b1e4032a3930e367e7280ea25c74e6506d69b804a13ed9cf3f11b7c4093629bbed1a7133dc4eb5cbcdca777f724be08d09a3647
|
data/bin/dynamised
CHANGED
@@ -16,7 +16,10 @@ module Dynamised
|
|
16
16
|
command :run do |c|
|
17
17
|
c.syntax = 'dynamised run <script>'
|
18
18
|
c.description = 'scrapes with given scraper'
|
19
|
+
c.option "--[no-]db", 'Turns off db, scraper will pull and store directly to end file, no resume'
|
19
20
|
c.action do |args,options|
|
21
|
+
options.default db: true
|
22
|
+
args = {nodb: options.db}
|
20
23
|
script_path = check_and_convert(args.first)
|
21
24
|
class_name = get_class_name(args.first)
|
22
25
|
create_temp_class(class_name,File.read(script_path))
|
@@ -5,6 +5,14 @@ module Dynamised
|
|
5
5
|
string.gsub(/<\/?[^>]*>/, "").strip.gsub(/ ?\\r\\n/,'')
|
6
6
|
end
|
7
7
|
|
8
|
+
def unescape_html(string,field_data)
|
9
|
+
CGI::unescapeHTML(string)
|
10
|
+
end
|
11
|
+
|
12
|
+
def escape_html(string,field_data)
|
13
|
+
CGI::escapeHTML(string)
|
14
|
+
end
|
15
|
+
|
8
16
|
def page_url(string,field_data)
|
9
17
|
@current_url
|
10
18
|
end
|
data/lib/dynamised/meta.rb
CHANGED
data/lib/dynamised/scraper.rb
CHANGED
@@ -130,7 +130,14 @@ module Dynamised
|
|
130
130
|
tree.data[:fields].each_with_object({}) do |(field,data),res_hash|
|
131
131
|
target = execute_method(data[:meta][:before],remove_style_tags(doc),res_hash)
|
132
132
|
value = scrape_tag(target,data[:xpath],data[:meta])
|
133
|
-
res_hash[field] =
|
133
|
+
res_hash[field] =
|
134
|
+
if value
|
135
|
+
[*data[:meta][:after]].each do |method|
|
136
|
+
execute_method(method,value,res_hash)
|
137
|
+
end
|
138
|
+
else
|
139
|
+
data[:meta].fetch(:default,nil)
|
140
|
+
end
|
134
141
|
end
|
135
142
|
@scraped_data[c_url] = fields.to_json if @use_store
|
136
143
|
block.call(fields)
|
data/lib/dynamised.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
%w{tty-spinner nokogiri awesome_print dbm json}.each {|lib| require lib}
|
1
|
+
%w{tty-spinner nokogiri awesome_print dbm json cgi}.each {|lib| require lib}
|
2
2
|
%w{meta after_scrape_methods before_scrape_methods curb_dsl helpers node scraper_dsl writers dbm_wrapper scraper}
|
3
3
|
.each do |f|
|
4
4
|
require_relative "dynamised/%s" % f
|