apify_core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 39b891973e8c30065d8136d0b61f4fea6c4c8328
4
+ data.tar.gz: e2ce4293075a8bb218660a0e0546e86e51559b90
5
+ SHA512:
6
+ metadata.gz: fdd15b69af5f5068a7f2fadc8afd2f746c074c757fb979569a65eeff525cca225472f21814b1869a20c147f5df3935ad50d40ab61f6a14d82009f50009077c9b
7
+ data.tar.gz: 57899e4ff289ead9ced7b376b345d4fd9f0b4b1f9b0a53221237724e2510c96bf225e6e44e1694317396bcee20caec8773c5e0fe7b044c02ddfb496e02635fb0
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in apify.gemspec
4
+ gemspec
5
+
6
+ gem 'activesupport', require: 'active_support/all'
7
+ gem 'watir-webdriver', '~> 0.6.11'
8
+ gem 'rest_client', '~> 1.8.2'
9
+ gem 'headless', '~> 1.0.2'
10
+ gem 'parallel', '~> 1.3.3'
11
+ gem 'nokogiri', '~> 1.6.5'
12
+
13
+ group :development do
14
+ gem 'awesome_print', '~> 1.2.0'
15
+ gem 'pry'
16
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,79 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ apify_core (0.0.1)
5
+ activesupport
6
+ headless
7
+ json
8
+ nokogiri (~> 1.6.5)
9
+ parallel
10
+ rest_client
11
+ watir-webdriver
12
+
13
+ GEM
14
+ remote: https://rubygems.org/
15
+ specs:
16
+ activesupport (4.2.0)
17
+ i18n (~> 0.7)
18
+ json (~> 1.7, >= 1.7.7)
19
+ minitest (~> 5.1)
20
+ thread_safe (~> 0.3, >= 0.3.4)
21
+ tzinfo (~> 1.1)
22
+ awesome_print (1.2.0)
23
+ childprocess (0.5.5)
24
+ ffi (~> 1.0, >= 1.0.11)
25
+ diff-lcs (1.2.5)
26
+ ffi (1.9.6)
27
+ headless (1.0.2)
28
+ i18n (0.7.0)
29
+ json (1.8.1)
30
+ mini_portile (0.6.2)
31
+ minitest (5.5.0)
32
+ multi_json (1.10.1)
33
+ netrc (0.7.9)
34
+ nokogiri (1.6.5)
35
+ mini_portile (~> 0.6.0)
36
+ parallel (1.3.3)
37
+ rake (10.4.2)
38
+ rest_client (1.8.2)
39
+ netrc (~> 0.7.7)
40
+ rspec (3.0.0)
41
+ rspec-core (~> 3.0.0)
42
+ rspec-expectations (~> 3.0.0)
43
+ rspec-mocks (~> 3.0.0)
44
+ rspec-core (3.0.4)
45
+ rspec-support (~> 3.0.0)
46
+ rspec-expectations (3.0.4)
47
+ diff-lcs (>= 1.2.0, < 2.0)
48
+ rspec-support (~> 3.0.0)
49
+ rspec-mocks (3.0.4)
50
+ rspec-support (~> 3.0.0)
51
+ rspec-support (3.0.4)
52
+ rubyzip (1.1.6)
53
+ selenium-webdriver (2.44.0)
54
+ childprocess (~> 0.5)
55
+ multi_json (~> 1.0)
56
+ rubyzip (~> 1.0)
57
+ websocket (~> 1.0)
58
+ thread_safe (0.3.4)
59
+ tzinfo (1.2.2)
60
+ thread_safe (~> 0.1)
61
+ watir-webdriver (0.6.11)
62
+ selenium-webdriver (>= 2.18.0)
63
+ websocket (1.2.1)
64
+
65
+ PLATFORMS
66
+ ruby
67
+
68
+ DEPENDENCIES
69
+ activesupport
70
+ apify_core!
71
+ awesome_print (~> 1.2.0)
72
+ bundler (~> 1.7)
73
+ headless (~> 1.0.2)
74
+ nokogiri (~> 1.6.5)
75
+ parallel (~> 1.3.3)
76
+ rake (~> 10.0)
77
+ rest_client (~> 1.8.2)
78
+ rspec (~> 3.0.0)
79
+ watir-webdriver (~> 0.6.11)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 victorvsk
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # Apify Core
2
+
3
+ Apify Core is a part of Apify Project. Parse HTML\XML to JSON with easy API and useful filters.
4
+ Apify Project allows even more - parsing entire website with east.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'apify_core'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install apify_core
21
+
22
+ ## Usage
23
+
24
+ ```
25
+ html = RestClient.get('http://github.com')
26
+ pattern = { title: '<% title %>' }
27
+ title = Apify::Core.new(html, pattern).perform # GitHub · Build software better, together.
28
+ ```
29
+
30
+ ```
31
+ request = { github: { url: ['http://github.com'], js: false, host: 'http://github.com', pattern: { title: '<% title %>' } } }
32
+ response = Apify.crawl!(request) # { "github": { "title": "GitHub · Build software better, together." } }
33
+ ```
34
+
35
+ See more in documentation (TODO). Also some syntax examples can be found in spec/examples.
36
+
37
+ ## Contributing
38
+
39
+ 1. Fork it ( https://github.com/victorvsk/apify-core/fork )
40
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
41
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
42
+ 4. Push to the branch (`git push origin my-new-feature`)
43
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('./Gemfile', __FILE__)
2
+ require 'rubygems'
3
+ require "bundler/gem_tasks"
@@ -0,0 +1,34 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'apify_core/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "apify_core"
8
+ spec.version = Apify::Core::VERSION
9
+ spec.authors = ["victorvsk"]
10
+ spec.email = ["victor@vyskrebentsev.ru"]
11
+ spec.summary = %q{Core part of Apify project. An easy way to parse HTML\XML content and crawl websites in a normalized and centralized way.}
12
+ spec.description = %q{Simple API to transform from simple HTML to JSON to entire website to JSON.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.0.0"
24
+
25
+
26
+ spec.add_dependency 'watir-webdriver'
27
+ spec.add_dependency 'rest_client'
28
+ spec.add_dependency 'headless'
29
+ spec.add_dependency 'parallel'
30
+ spec.add_dependency 'nokogiri', '~> 1.6.5'
31
+ spec.add_dependency 'json'
32
+ spec.add_dependency 'activesupport'
33
+
34
+ end
data/bin/bundler ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'bundler' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('bundler', 'bundler')
data/bin/coderay ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'coderay' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('coderay', 'coderay')
data/bin/htmldiff ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'htmldiff' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('diff-lcs', 'htmldiff')
data/bin/ldiff ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'ldiff' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('diff-lcs', 'ldiff')
data/bin/nokogiri ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'nokogiri' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('nokogiri', 'nokogiri')
data/bin/pry ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'pry' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('pry', 'pry')
data/bin/rackup ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rackup' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rack', 'rackup')
data/bin/rake ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rake' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rake', 'rake')
data/bin/rspec ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rspec-core', 'rspec')
data/bin/server ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'server' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('apify', 'server')
data/bin/tilt ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'tilt' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('tilt', 'tilt')
@@ -0,0 +1,190 @@
1
+ module Apify
2
+ module Core
3
+ class Fetcher
4
+ attr_accessor :sources
5
+ attr_accessor :result
6
+
7
+ def initialize( pages, processes=2, delay=0 )
8
+ @pages = pages
9
+ @processes = processes
10
+ @delay = delay
11
+ end
12
+
13
+ def prepare
14
+ @pages.each do |key, value|
15
+ value[:url] = Filter.apply(value[:url], ['map_urlencode'])
16
+ self.class.send(:attr_reader, key.to_sym)
17
+ js = value[:js] || false
18
+ pattern = value[:pattern]
19
+ host = value[:host]
20
+ if value[:from]
21
+ v = value
22
+ instance_variable_set("@#{key}".to_sym, v)
23
+ next
24
+ end
25
+ url = self.class.base_url_for(value[:url], value[:host])
26
+
27
+ if value[:paginate]
28
+ pages = self.class.paginate(url: url, to_replace: value[:paginate][0], pagination: value[:paginate][1])
29
+ result = { pages: pages }
30
+ else
31
+ url = (url.respond_to?(:each) ? url : url.to_s)
32
+ result = { pages: [url] }
33
+ end
34
+ result[:js] = js
35
+ result[:pattern] = pattern
36
+ result[:host] = host
37
+ instance_variable_set("@#{key}".to_sym, result)
38
+
39
+ end
40
+ self.sources = @pages.keys
41
+ end
42
+
43
+ def perform
44
+ parenthesis_args = /\([\"\'](.*?)[\"\']\)/
45
+ self.sources.each do |source|
46
+
47
+ if @pages[source].key?(:from)
48
+ expression = @pages[source][:from]
49
+ statement = {
50
+ select: expression.match(/select#{parenthesis_args}/)[1],
51
+ from: expression.match(/from#{parenthesis_args}/)[1],
52
+ filters: (expression.match(/filter#{parenthesis_args}/)[1].split('|').map(&:strip) rescue nil),
53
+ }
54
+ statement[:filters] = ['mapattr_href', 'map_urlencode'] unless statement[:filters].present?
55
+ urls = Filter.apply(Parser.fetch(statement[:select], self.send(statement[:from])[:pages]), statement[:filters])
56
+ @pages[source][:pages] = self.class.base_url_for(urls, @pages[source][:host])
57
+ end
58
+ src = self.send(source)
59
+ method = src[:js] ? :js : :normal
60
+ processes_number = (method == :js ? 1 : @processes )
61
+ src[:pages] = ::Parallel.map(src[:pages], in_processes: processes_number) do |url_or_array|
62
+ if url_or_array.respond_to?(:each)
63
+ res = []
64
+ url_or_array.each do |url|
65
+
66
+ res << self.class.download( url, method )
67
+ end
68
+ res
69
+ else
70
+ self.class.download( url_or_array, method )
71
+ end
72
+ end.flatten
73
+ end
74
+
75
+ result = {}
76
+ self.sources.each do |source|
77
+ src = self.send(source)
78
+ pattern = src[:pattern] ? src[:pattern].dup : false
79
+ elem = if src[:pattern]
80
+ src[:pages].map{ |html| Parser.new(html, src[:pattern]).perform }
81
+ else
82
+
83
+ src[:pages]
84
+ end
85
+
86
+ result[source] = elem if pattern
87
+ instance_variable_set("@#{source}".to_sym, elem )
88
+ end
89
+
90
+ @result = result
91
+ @json = result
92
+ end
93
+
94
+ def to_json
95
+ (@json || perform).to_json
96
+ end
97
+
98
+ class << self
99
+
100
+ # Fetcher::Fetcher.paginate(url: 'http://site.com', to_replace: '(\/?)$', pagination: '?page=<% 1,5,1 %>')
101
+
102
+ def paginate( opts={} )
103
+ pagination = opts[:pagination] || '?page=<% 1,5,1 %>'
104
+ to_replace = opts[:to_replace] || '(\/?)\Z'
105
+ url_or_array_of_urls = opts[:url]
106
+ raise ArgumentError, "URL parameter missing" if url_or_array_of_urls.nil?
107
+ regexp = /<%\s?+(\d+,\d+,\d+)\s?+%>/
108
+ pattern = pagination.scan(regexp)
109
+ return [opts[:url]] if pattern.count == 0
110
+ raise ArgumentError, "Only one pagination pattern allowed." if pattern.count > 1
111
+ result = []
112
+ pager_args = pattern.first.first.split(',').map(&:strip).map(&:to_i)
113
+ range = (pager_args[0]..pager_args[1])
114
+ range.step(pager_args[2]).each do |page|
115
+
116
+ to_append = pagination.gsub(regexp, page.to_s)
117
+ if url_or_array_of_urls.respond_to?(:each)
118
+ url_or_array_of_urls.each do |url|
119
+ result << url.to_s.chomp('/').gsub(Regexp.new(to_replace), to_append)
120
+ end
121
+ else
122
+ result << url_or_array_of_urls.to_s.chomp('/').gsub(Regexp.new(to_replace), to_append)
123
+ end
124
+
125
+ end
126
+
127
+ result
128
+ end
129
+
130
+ def download( url, method=:normal )
131
+ sleep @delay if @delay.to_i > 0
132
+ result = case method
133
+ when :js
134
+ headless = Headless.new
135
+ headless.start
136
+ browser = Watir::Browser.new
137
+ browser.goto url
138
+ html = browser.html
139
+ headless.destroy
140
+ print "+"
141
+ html
142
+ when :normal
143
+ begin
144
+ html = RestClient.get(url,
145
+ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
146
+ 'Accept-Language' => 'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4',
147
+ 'Connection' => 'keep-alive',
148
+ 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/39.0.2171.65 Chrome/39.0.2171.65 Safari/537.36')
149
+ d = Nokogiri::HTML(html)
150
+ charset = d.search('meta[@http-equiv="content-type"]').last['content'].match(/charset=(.+)/)[1].downcase rescue nil
151
+ if charset.present? and charset != 'utf-8'
152
+ html = html.force_encoding(charset).encode("utf-8", undef: :replace)
153
+ end
154
+ print "+"
155
+ html
156
+ rescue RestClient::RequestTimeout, RestClient::ResourceNotFound, RestClient::InternalServerError, URI::InvalidURIError, RestClient::Forbidden,RestClient::BadGateway, RestClient
157
+ print "-"
158
+ return
159
+ end
160
+ end
161
+
162
+ result
163
+ end
164
+
165
+ def base_url_for(url_or_array, base_url)
166
+
167
+ if url_or_array.respond_to?(:each)
168
+ result = []
169
+ url_or_array.each do |url|
170
+ #url = URI(URI.encode(url))
171
+ url = URI(url)
172
+ raise ArgumentError, "No host provided." if url.host.nil? and base_url.nil?
173
+ result << (url.host ? url.to_s : "#{base_url}#{url}")
174
+ end
175
+ result
176
+ else
177
+ #url = URI(URI.encode(url_or_array))
178
+ url = URI(url_or_array)
179
+ raise ArgumentError, "No host provided." if url.host.nil? and base_url.nil?
180
+ (url.host ? url : "#{base_url}#{url}")
181
+ end
182
+
183
+ end
184
+
185
+ end
186
+
187
+
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,83 @@
1
+ module Apify
2
+ module Core
3
+ module Filter
4
+ class << self
5
+
6
+ def apply(node_or_str, filters=[])
7
+ return node_or_str if filters.empty? or node_or_str.nil?
8
+ method = filters.shift
9
+ filtered_value = send(method, node_or_str)
10
+ apply(filtered_value, filters)
11
+ end
12
+
13
+ private
14
+
15
+ def first(node)
16
+ node.first
17
+ end
18
+
19
+ def text(node)
20
+ node.text
21
+ end
22
+
23
+ def strip(str)
24
+ str.strip if str
25
+ end
26
+
27
+ def list(node)
28
+ node
29
+ end
30
+
31
+ def html(node)
32
+ node.to_s
33
+ end
34
+
35
+ def inner_html(node)
36
+ node.inner_html.to_s
37
+ end
38
+
39
+ def map_text(node)
40
+ node.map(&:text).map(&:strip)
41
+ end
42
+
43
+ def map_html(node)
44
+ node.map(&:to_s)
45
+ end
46
+
47
+ def map_inner_html(node)
48
+ node.map(&:inner_html).map(&:to_s)
49
+ end
50
+
51
+
52
+ def map_urlencode(node)
53
+ node.map do |url|
54
+ urlencode(url)
55
+ end
56
+ end
57
+
58
+ def urlencode(url)
59
+ url = begin
60
+ url = URI(url)
61
+ url
62
+ rescue URI::InvalidURIError
63
+ URI.encode(url)
64
+ end
65
+ end
66
+
67
+ def method_missing(method_sym, *arguments, &block)
68
+ if method_sym =~ /\Amapattr_/
69
+ attribute = method_sym.to_s.gsub('mapattr_', '')
70
+ arguments.first.map{ |n| n[attribute] }
71
+ elsif method_sym =~ /\Aattr_/
72
+ attribute = method_sym.to_s.gsub('attr_', '')
73
+ arguments.first[attribute]
74
+ else
75
+ super
76
+ end
77
+ end
78
+
79
+
80
+ end
81
+ end
82
+ end
83
+ end