Dynamised 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 095196c60ad862112370409060962832c85306b2
4
- data.tar.gz: d5106d54047da1901bbe2ad0a0894f1245eee38b
3
+ metadata.gz: c5635a9d0b97dcc25da301b5a176de94ea7afc42
4
+ data.tar.gz: 4eb55543e0be23dd36cd840a4198ce719f85e381
5
5
  SHA512:
6
- metadata.gz: eaec3611b7acf56e4864c1c738353ac226a5c81c7927eedb491df7bdb92d5ded7e1b5dfd07284a2cd5cec556cb083af9abad22c6d7ca9c47273634a6fd143d67
7
- data.tar.gz: cef11f2de483ad33a97986c18a6b770ba30c3c0becc3beb700a0b2a2abba364e4f530f576466cd8af8de857c9a615a21fd4cfe6e3c45996fd73c161028606a40
6
+ metadata.gz: e1eea295b1d0852072bfdfa8464ad6de008288d419831b042b74df827faa5f28478725faeb2127bfbfc6bf44b6bde82ee91e36fc168677f33f4578db00edce7a
7
+ data.tar.gz: 797fbcb4b2b9ecd58531a59c11d5ca65813315c09b2e1c593f09ad6794c1ddf6482c47a9cfec148cdb58e865a4951f7615f2e9ee8cb55a224bfa2711808604c4
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
- puts [$0, __FILE__].inspect
3
2
  require_relative '../lib/dynamised'
4
3
  require 'commander'
5
4
 
@@ -21,6 +21,16 @@ module Dynamised
21
21
  @current_page.data[:fields].keys
22
22
  end
23
23
 
24
+ def pbcopy(input)
25
+ str = input.to_s
26
+ IO.popen('pbcopy', 'w') { |f| f << str }
27
+ str
28
+ end
29
+
30
+ def pbpaste
31
+ `pbpaste`
32
+ end
33
+
24
34
  end
25
35
  end
26
36
  end
@@ -1,6 +1,6 @@
1
1
  module Dynamised
2
2
  module META
3
- Version = "0.2.0"
3
+ Version = "0.2.1"
4
4
  Description = <<-DESC.gsub(/^\s*/, '')
5
5
  A tool that allows a user to build a web scraper that works by recursively crawling pages until
6
6
  it finds the requested infomation.
@@ -101,7 +101,7 @@ module Dynamised
101
101
  scrape(doc,tree,&block)
102
102
  end
103
103
  if pagination?(doc,tree)
104
- paginate(tree) do |item|
104
+ paginate(doc,tree) do |item|
105
105
  pull(item,tree,&block)
106
106
  end
107
107
  else
@@ -117,10 +117,10 @@ module Dynamised
117
117
 
118
118
  def paginate(doc,tree)
119
119
  current_page = doc
120
- max = scrape_tag(current_page,tree[:paginate][:max],{r_type: :to_i})
120
+ max = scrape_tag(current_page,tree.data[:paginate][:max],{r_type: :to_i})
121
121
  raise "No paginate max tag found" unless max
122
122
  (1..max).each do
123
- (current_page.xpath(tree[:paginate][:item])).each do |node|
123
+ (current_page.xpath(tree.data[:paginate][:item])).each do |node|
124
124
  yield(item)
125
125
  end
126
126
  current_page = get_doc(current_page.xpath(tree[:paginate][:next]).attr('href'))
@@ -128,7 +128,7 @@ module Dynamised
128
128
  end
129
129
 
130
130
  def pagination?(doc,tree)
131
- search_for_tag(doc,tree[:paginate][:if])
131
+ search_for_tag(doc,tree.data[:paginate][:if])
132
132
  end
133
133
 
134
134
 
@@ -152,12 +152,13 @@ module Dynamised
152
152
  fields =
153
153
  tree.data[:fields].each_with_object({}) do |(field,data),res_hash|
154
154
  target = execute_method(data[:meta][:before],remove_style_tags(doc),res_hash)
155
- value = scrape_tag(target,data[:xpath],data[:meta])
155
+ value = data[:xpath].empty? ? target : scrape_tag(target,data[:xpath],data[:meta])
156
156
  res_hash[field] =
157
157
  if value
158
- [*data[:meta][:after]].each do |method|
158
+ res = [*data[:meta][:after]].map do |method|
159
159
  execute_method(method,value,res_hash)
160
160
  end
161
+ res.length > 1 ? res : res.first
161
162
  else
162
163
  data[:meta].fetch(:default,nil)
163
164
  end
@@ -230,7 +231,7 @@ module Dynamised
230
231
  end
231
232
 
232
233
  def search_for_tag(doc,xpath)
233
- doc.at_xpath(XPATH_Anchor % xpath)
234
+ xpath ? doc.at_xpath(XPATH_Anchor % xpath) : false
234
235
  end
235
236
 
236
237
  def scrape_tag(doc,xpath,meta={})
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: Dynamised
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Becker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-21 00:00:00.000000000 Z
11
+ date: 2017-05-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: tty-spinner