Dynamised 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/dynamised +3 -0
- data/lib/dynamised/after_scrape_methods.rb +8 -0
- data/lib/dynamised/meta.rb +1 -1
- data/lib/dynamised/scraper.rb +8 -1
- data/lib/dynamised.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 92c09eaab0e73d11f5961faa906a83bf1029fc19
|
4
|
+
data.tar.gz: c67b20e85a6e72ede471fb285ae91bfe444c6786
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5846634eb62634d1006d7a0dd61a0afbb8da0a3abb4d960a42358d245e14dfa110805662fdd51fbd1599286e7cc8f4b8d08a9999e9af0b1b9360f2c60941b352
|
7
|
+
data.tar.gz: b7937da48069fefabd9ae5118b1e4032a3930e367e7280ea25c74e6506d69b804a13ed9cf3f11b7c4093629bbed1a7133dc4eb5cbcdca777f724be08d09a3647
|
data/bin/dynamised
CHANGED
@@ -16,7 +16,10 @@ module Dynamised
|
|
16
16
|
command :run do |c|
|
17
17
|
c.syntax = 'dynamised run <script>'
|
18
18
|
c.description = 'scrapes with given scraper'
|
19
|
+
c.option "--[no-]db", 'Turns off db, scraper will pull and store directly to end file, no resume'
|
19
20
|
c.action do |args,options|
|
21
|
+
options.default db: true
|
22
|
+
args = {nodb: options.db}
|
20
23
|
script_path = check_and_convert(args.first)
|
21
24
|
class_name = get_class_name(args.first)
|
22
25
|
create_temp_class(class_name,File.read(script_path))
|
@@ -5,6 +5,14 @@ module Dynamised
|
|
5
5
|
string.gsub(/<\/?[^>]*>/, "").strip.gsub(/ ?\\r\\n/,'')
|
6
6
|
end
|
7
7
|
|
8
|
+
def unescape_html(string,field_data)
|
9
|
+
CGI::unescapeHTML(string)
|
10
|
+
end
|
11
|
+
|
12
|
+
def escape_html(string,field_data)
|
13
|
+
CGI::escapeHTML(string)
|
14
|
+
end
|
15
|
+
|
8
16
|
def page_url(string,field_data)
|
9
17
|
@current_url
|
10
18
|
end
|
data/lib/dynamised/meta.rb
CHANGED
data/lib/dynamised/scraper.rb
CHANGED
@@ -130,7 +130,14 @@ module Dynamised
|
|
130
130
|
tree.data[:fields].each_with_object({}) do |(field,data),res_hash|
|
131
131
|
target = execute_method(data[:meta][:before],remove_style_tags(doc),res_hash)
|
132
132
|
value = scrape_tag(target,data[:xpath],data[:meta])
|
133
|
-
res_hash[field] =
|
133
|
+
res_hash[field] =
|
134
|
+
if value
|
135
|
+
[*data[:meta][:after]].each do |method|
|
136
|
+
execute_method(method,value,res_hash)
|
137
|
+
end
|
138
|
+
else
|
139
|
+
data[:meta].fetch(:default,nil)
|
140
|
+
end
|
134
141
|
end
|
135
142
|
@scraped_data[c_url] = fields.to_json if @use_store
|
136
143
|
block.call(fields)
|
data/lib/dynamised.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
%w{tty-spinner nokogiri awesome_print dbm json}.each {|lib| require lib}
|
1
|
+
%w{tty-spinner nokogiri awesome_print dbm json cgi}.each {|lib| require lib}
|
2
2
|
%w{meta after_scrape_methods before_scrape_methods curb_dsl helpers node scraper_dsl writers dbm_wrapper scraper}
|
3
3
|
.each do |f|
|
4
4
|
require_relative "dynamised/%s" % f
|