apify_core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ module Apify
2
+ module Core
3
+ class Parser
4
+
5
+ def initialize(html="", pattern={})
6
+ @html, @pattern, @doc, @result = html, pattern, ::Nokogiri::HTML(html), {}
7
+ end
8
+
9
+ def perform
10
+ @pattern.each do |key,value|
11
+ next if key == '__iterator__'
12
+ @result[key] = process(value, @doc)
13
+ end
14
+ @result
15
+ end
16
+
17
+ def self.fetch(expression, docs)
18
+ docs.map{ |html| ::Nokogiri::HTML(html).search(expression) }.flatten
19
+ end
20
+
21
+ private
22
+ def process(value, context)
23
+ case value
24
+ when Hash
25
+ if value.keys.include?('__iterator__')
26
+ new_hash = []
27
+ context = context.search(value['__iterator__'].strip)
28
+ context.each_with_index do |node, index|
29
+ new_hash[index] = {}
30
+ value.each do |k,v|
31
+ next if k == '__iterator__'
32
+ new_hash[index][k] = process(v, node)
33
+ end
34
+ end
35
+
36
+ new_hash
37
+ else
38
+ new_hash = {}
39
+ value.each do |k,v|
40
+ new_hash[k] = process(v, context)
41
+ end
42
+ new_hash
43
+ end
44
+ when Array
45
+ value.map{ |v| process(v, context) }
46
+ when String
47
+ matcher = value.scan(/<%\s?+(.*?)\s?+%>/)
48
+ # <% single selector %>
49
+ if matcher and matcher.size == 1
50
+ expression_and_filters = value[2...-2].split('|').map(&:strip)
51
+ expression = expression_and_filters[0].strip
52
+ filters = expression_and_filters[1..-1]
53
+ filters = ['first', 'text', 'strip'] unless filters.present?
54
+
55
+ result = context.search(expression)
56
+ result ? Filter.apply(result, filters) : nil
57
+ # <% selector %> --- <% another selector %>
58
+ elsif matcher and matcher.size > 1
59
+ value.gsub(/<%\s?+(.*?)\s?+%>/) { process("<% #{$1.strip} %>", context) }
60
+ else
61
+ value
62
+ end
63
+ end
64
+ end
65
+
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,5 @@
1
+ module Apify
2
+ module Core
3
+ VERSION = "0.1.0"
4
+ end
5
+ end
data/lib/apify_core.rb ADDED
@@ -0,0 +1,19 @@
1
+ require "apify_core/version"
2
+ require "apify_core/parser"
3
+ require "apify_core/filter"
4
+ require "apify_core/fetcher"
5
+
6
+ module Apify
7
+ module Core
8
+ def self.crawl!( pages, processes=2, delay=0 )
9
+ fetcher = Fetcher.new(pages.with_indifferent_access, processes, delay); nil
10
+ fetcher.prepare; nil
11
+ fetcher.perform; nil
12
+ fetcher
13
+ end
14
+
15
+ def self.root
16
+ File.expand_path '../..', __FILE__
17
+ end
18
+ end
19
+ end