digger 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/digger/index.rb +40 -0
- data/lib/digger/model.rb +23 -5
- data/lib/digger/version.rb +1 -1
- metadata +3 -3
- data/lib/digger/batch.rb +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 204be25e4d5bb59529fc7f5a392b86bf6f7ed631
|
4
|
+
data.tar.gz: 21119d3c972aa60d2f6c95dddfd66c4b1ab4cfa0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f1dcd8583e6038c4237308b8a7488336f8d3ecb3b8a87c745622ee95719f964e832fe6cfba5a515c1cfad61ae02382875e3c93b19156e2e391b8eee15832c9a
|
7
|
+
data.tar.gz: ab18eff67be45f8d9ed3b43fcefe14a92a8f7089cd30c366b6e1f3ab8c1cd78f7727b63bfa318ea828e2ae0d9a8386336e761bb3417815cf24492f8609d2e328
|
data/lib/digger/index.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
module Digger
|
2
|
+
class Index < Struct.new(:pattern, :args)
|
3
|
+
class NoBlockError < ArgumentError; end
|
4
|
+
|
5
|
+
def process(cocurrence = 1, &block)
|
6
|
+
Index.batch(urls, cocurrence, block)
|
7
|
+
end
|
8
|
+
|
9
|
+
def urls
|
10
|
+
@urls ||= begin
|
11
|
+
args = self.args.map{|a| (a.respond_to? :each) ? a.to_a : [a]}
|
12
|
+
args.shift.product(*args).map{|arg| pattern_applied_url(arg)}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def pattern_applied_url(arg)
|
17
|
+
pattern.gsub('*').each_with_index{|_, i| arg[i]}
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.batch(entities, cocurrence = 1, &block)
|
21
|
+
raise NoBlockError, "No block given" unless block
|
22
|
+
|
23
|
+
if cocurrence > 1
|
24
|
+
results = {}
|
25
|
+
entities.each_slice(cocurrence) do |group|
|
26
|
+
threads = []
|
27
|
+
group.each do |entity|
|
28
|
+
threads << Thread.new(entity) do |ent|
|
29
|
+
results[ent] = block.call(ent)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
threads.each{|thread| thread.join}
|
33
|
+
end
|
34
|
+
entities.map{|ent| results[ent]}
|
35
|
+
else
|
36
|
+
entities.map{|ent| block.call(ent) }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/digger/model.rb
CHANGED
@@ -2,11 +2,12 @@ require 'digger/pattern'
|
|
2
2
|
|
3
3
|
module Digger
|
4
4
|
class Model
|
5
|
-
@@
|
5
|
+
@@digger_config = {'pattern'=>{}, 'index'=>{}}
|
6
6
|
|
7
7
|
class << self
|
8
|
+
# patterns
|
8
9
|
def pattern_config
|
9
|
-
@@
|
10
|
+
@@digger_config['pattern'][self.name] ||= {}
|
10
11
|
end
|
11
12
|
|
12
13
|
Pattern::TYPES.each do |method|
|
@@ -17,10 +18,17 @@ module Digger
|
|
17
18
|
}
|
18
19
|
end
|
19
20
|
|
20
|
-
|
21
|
+
# index page
|
22
|
+
def index_config
|
23
|
+
@@digger_config['index'][self.name]
|
21
24
|
end
|
22
25
|
|
23
|
-
def
|
26
|
+
def index_page(pattern, *args)
|
27
|
+
@@digger_config['index'][self.name] = Index.new(pattern, args)
|
28
|
+
end
|
29
|
+
|
30
|
+
def index_page?
|
31
|
+
!index_config.nil?
|
24
32
|
end
|
25
33
|
end
|
26
34
|
|
@@ -32,10 +40,20 @@ module Digger
|
|
32
40
|
result
|
33
41
|
end
|
34
42
|
|
35
|
-
def
|
43
|
+
def dig_url(url)
|
36
44
|
client = Digger::HTTP.new
|
37
45
|
page = client.fetch_page(url)
|
38
46
|
match_page(page)
|
39
47
|
end
|
48
|
+
|
49
|
+
def dig(urls = [], cocurrence = 1)
|
50
|
+
if urls.empty?
|
51
|
+
if self.class.index_page?
|
52
|
+
self.class.index_config.process(cocurrence){|url| dig_url(url) }
|
53
|
+
end
|
54
|
+
else
|
55
|
+
Index.batch(urls, cocurrence){|url| dig_url(url) }
|
56
|
+
end
|
57
|
+
end
|
40
58
|
end
|
41
59
|
end
|
data/lib/digger/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- binz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,8 +80,8 @@ files:
|
|
80
80
|
- Rakefile
|
81
81
|
- digger.gemspec
|
82
82
|
- lib/digger.rb
|
83
|
-
- lib/digger/batch.rb
|
84
83
|
- lib/digger/http.rb
|
84
|
+
- lib/digger/index.rb
|
85
85
|
- lib/digger/model.rb
|
86
86
|
- lib/digger/page.rb
|
87
87
|
- lib/digger/pattern.rb
|
data/lib/digger/batch.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
module Digger
|
2
|
-
module Batch
|
3
|
-
class NoBlockError < ArgumentError; end
|
4
|
-
|
5
|
-
def self.do(cocurrence, entities, &block)
|
6
|
-
raise NoBlockError, "No block given" unless block
|
7
|
-
entities.each_slice(cocurrence) do |group|
|
8
|
-
threads = []
|
9
|
-
group.each do |entity|
|
10
|
-
threads << Thread.new(entity) do |ent|
|
11
|
-
block.call(ent)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
threads.each{|thread| thread.join}
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|