web_scraper 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/web_scraper.rb +165 -0
  3. metadata +59 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a3e01f01d20813809f915bc0b97280f3d35c2153
4
+ data.tar.gz: e73cf357e0151f499414ba21ce178a8724271e9c
5
+ SHA512:
6
+ metadata.gz: d38b93e448b86791f93918226d9dbadd6304667b4b7a2c1ef794802eb47cbc21d98331d2af86ccebb0d5011c36d0578bb861a7e8663c91e832c77a2356d0ba8d
7
+ data.tar.gz: 4c5213c3d410bb89ad093da6fc9f578c637e6bade5dbf94b5d4e93a891c1b94ef9ce6dbb9d35197bd0b728bb11e20076d3a21837e18120e6660a60d157cc150b
@@ -0,0 +1,165 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ class WebScraper
5
+ class ConfigurationError < RuntimeError
6
+ def message
7
+ 'resource, base, properties and key should be defined'
8
+ end
9
+ end
10
+
11
+ class ResourceDefentitionError < RuntimeError
12
+ def message
13
+ 'resource should be a string'
14
+ end
15
+ end
16
+
17
+ class BaseDefentitionError < RuntimeError
18
+ def message
19
+ 'base should be a selector (:css|:xpath => String)'
20
+ end
21
+ end
22
+
23
+ class PropertyDefentitionError < RuntimeError
24
+ def message
25
+ 'property is a name (with type optionally) ' +
26
+ 'and a selector (:css|:xpath => String)'
27
+ end
28
+ end
29
+
30
+ class KeyDefentitionError < RuntimeError
31
+ def message
32
+ 'key should be a name of a defined property'
33
+ end
34
+ end
35
+
36
+ class << self
37
+ def all
38
+ raise ConfigurationError unless valid?
39
+
40
+ @all ||= Nokogiri::HTML(open(_resource))
41
+ .send(*_base).map { |node| new(node) }
42
+ end
43
+
44
+ def count
45
+ all.size
46
+ end
47
+
48
+ def expire
49
+ @all = nil
50
+ end
51
+
52
+ def find(key)
53
+ all.find { |e| e.send(_key) == key }
54
+ end
55
+
56
+ def resource(_resource)
57
+ raise ResourceDefentitionError unless _resource.is_a? String
58
+
59
+ @_resource = _resource
60
+ end
61
+
62
+ attr_reader :_resource
63
+
64
+ def base(_base)
65
+ raise BaseDefentitionError unless valid_selector? _base
66
+
67
+ @_base = _base.to_a.flatten
68
+ end
69
+
70
+ attr_reader :_base
71
+
72
+ def property(*args)
73
+ @properties ||= {}
74
+
75
+ exception = PropertyDefentitionError
76
+
77
+ case args.length
78
+ when 1
79
+ params = args[0]
80
+
81
+ raise exception unless params.is_a? Hash
82
+
83
+ info = params.reject { |k| [:css, :xpath].include? k }
84
+ selector = params.select { |k| [:css, :xpath].include? k }
85
+ when 2
86
+ name, selector = args
87
+ info = { name => :string }
88
+ else
89
+ raise exception
90
+ end
91
+
92
+ raise exception unless valid_selector? selector
93
+ raise exception unless valid_info? info
94
+
95
+ name = info.keys.first
96
+ type = info.values.first
97
+ selector = selector.to_a.flatten
98
+
99
+ @properties[name] = { type: type, selector: selector }
100
+ end
101
+
102
+ attr_reader :properties
103
+
104
+ def key(_key)
105
+ raise KeyDefentitionError unless properties.keys.include? _key
106
+
107
+ @_key = _key
108
+ end
109
+
110
+ attr_reader :_key
111
+
112
+ def valid?
113
+ _resource && _base && _key
114
+ end
115
+
116
+ def valid_selector?(selector)
117
+ (selector.is_a? Hash) &&
118
+ (selector.size == 1) &&
119
+ ([:css, :xpath].include? selector.keys.first) &&
120
+ (selector.values.first.is_a? String)
121
+ end
122
+
123
+ def valid_info?(info)
124
+ (info.is_a? Hash) &&
125
+ (info.size == 1) &&
126
+ (info.keys.first.is_a? Symbol) &&
127
+ ([:string, :integer, :float, :node].include? info.values.first)
128
+ end
129
+
130
+ private :new
131
+ end
132
+
133
+ def initialize(node)
134
+ @node = node
135
+ end
136
+
137
+ attr_reader :node
138
+
139
+ def css(*args)
140
+ node.css(*args)
141
+ end
142
+
143
+ def xpath(*args)
144
+ node.xpath(*args)
145
+ end
146
+
147
+ def method_missing(name, *args, &block)
148
+ if self.class.properties.key? name
149
+ property = self.class.properties[name]
150
+
151
+ type = property[:type]
152
+ value = @node.send(*property[:selector])
153
+
154
+ case type
155
+ when :string then value.text.strip
156
+ when :integer then value.text.to_i
157
+ when :float then value.text.to_f
158
+ when :node then value
159
+ end
160
+ else
161
+ super(name, *args, &block)
162
+ end
163
+ end
164
+ end
165
+
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: web_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Speransky Danil
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-03-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: ''
28
+ email: speranskydanil@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/web_scraper.rb
34
+ homepage: http://speranskydanil.github.io/web_scraper/
35
+ licenses:
36
+ - MIT
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 2.2.2
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: ''
58
+ test_files: []
59
+ has_rdoc: