web_scraper 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/web_scraper.rb +165 -0
  3. metadata +59 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a3e01f01d20813809f915bc0b97280f3d35c2153
4
+ data.tar.gz: e73cf357e0151f499414ba21ce178a8724271e9c
5
+ SHA512:
6
+ metadata.gz: d38b93e448b86791f93918226d9dbadd6304667b4b7a2c1ef794802eb47cbc21d98331d2af86ccebb0d5011c36d0578bb861a7e8663c91e832c77a2356d0ba8d
7
+ data.tar.gz: 4c5213c3d410bb89ad093da6fc9f578c637e6bade5dbf94b5d4e93a891c1b94ef9ce6dbb9d35197bd0b728bb11e20076d3a21837e18120e6660a60d157cc150b
@@ -0,0 +1,165 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ class WebScraper
5
+ class ConfigurationError < RuntimeError
6
+ def message
7
+ 'resource, base, properties and key should be defined'
8
+ end
9
+ end
10
+
11
+ class ResourceDefentitionError < RuntimeError
12
+ def message
13
+ 'resource should be a string'
14
+ end
15
+ end
16
+
17
+ class BaseDefentitionError < RuntimeError
18
+ def message
19
+ 'base should be a selector (:css|:xpath => String)'
20
+ end
21
+ end
22
+
23
+ class PropertyDefentitionError < RuntimeError
24
+ def message
25
+ 'property is a name (with type optionally) ' +
26
+ 'and a selector (:css|:xpath => String)'
27
+ end
28
+ end
29
+
30
+ class KeyDefentitionError < RuntimeError
31
+ def message
32
+ 'key should be a name of a defined property'
33
+ end
34
+ end
35
+
36
+ class << self
37
+ def all
38
+ raise ConfigurationError unless valid?
39
+
40
+ @all ||= Nokogiri::HTML(open(_resource))
41
+ .send(*_base).map { |node| new(node) }
42
+ end
43
+
44
+ def count
45
+ all.size
46
+ end
47
+
48
+ def expire
49
+ @all = nil
50
+ end
51
+
52
+ def find(key)
53
+ all.find { |e| e.send(_key) == key }
54
+ end
55
+
56
+ def resource(_resource)
57
+ raise ResourceDefentitionError unless _resource.is_a? String
58
+
59
+ @_resource = _resource
60
+ end
61
+
62
+ attr_reader :_resource
63
+
64
+ def base(_base)
65
+ raise BaseDefentitionError unless valid_selector? _base
66
+
67
+ @_base = _base.to_a.flatten
68
+ end
69
+
70
+ attr_reader :_base
71
+
72
+ def property(*args)
73
+ @properties ||= {}
74
+
75
+ exception = PropertyDefentitionError
76
+
77
+ case args.length
78
+ when 1
79
+ params = args[0]
80
+
81
+ raise exception unless params.is_a? Hash
82
+
83
+ info = params.reject { |k| [:css, :xpath].include? k }
84
+ selector = params.select { |k| [:css, :xpath].include? k }
85
+ when 2
86
+ name, selector = args
87
+ info = { name => :string }
88
+ else
89
+ raise exception
90
+ end
91
+
92
+ raise exception unless valid_selector? selector
93
+ raise exception unless valid_info? info
94
+
95
+ name = info.keys.first
96
+ type = info.values.first
97
+ selector = selector.to_a.flatten
98
+
99
+ @properties[name] = { type: type, selector: selector }
100
+ end
101
+
102
+ attr_reader :properties
103
+
104
+ def key(_key)
105
+ raise KeyDefentitionError unless properties.keys.include? _key
106
+
107
+ @_key = _key
108
+ end
109
+
110
+ attr_reader :_key
111
+
112
+ def valid?
113
+ _resource && _base && _key
114
+ end
115
+
116
+ def valid_selector?(selector)
117
+ (selector.is_a? Hash) &&
118
+ (selector.size == 1) &&
119
+ ([:css, :xpath].include? selector.keys.first) &&
120
+ (selector.values.first.is_a? String)
121
+ end
122
+
123
+ def valid_info?(info)
124
+ (info.is_a? Hash) &&
125
+ (info.size == 1) &&
126
+ (info.keys.first.is_a? Symbol) &&
127
+ ([:string, :integer, :float, :node].include? info.values.first)
128
+ end
129
+
130
+ private :new
131
+ end
132
+
133
+ def initialize(node)
134
+ @node = node
135
+ end
136
+
137
+ attr_reader :node
138
+
139
+ def css(*args)
140
+ node.css(*args)
141
+ end
142
+
143
+ def xpath(*args)
144
+ node.xpath(*args)
145
+ end
146
+
147
+ def method_missing(name, *args, &block)
148
+ if self.class.properties.key? name
149
+ property = self.class.properties[name]
150
+
151
+ type = property[:type]
152
+ value = @node.send(*property[:selector])
153
+
154
+ case type
155
+ when :string then value.text.strip
156
+ when :integer then value.text.to_i
157
+ when :float then value.text.to_f
158
+ when :node then value
159
+ end
160
+ else
161
+ super(name, *args, &block)
162
+ end
163
+ end
164
+ end
165
+
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: web_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Speransky Danil
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-03-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: ''
28
+ email: speranskydanil@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/web_scraper.rb
34
+ homepage: http://speranskydanil.github.io/web_scraper/
35
+ licenses:
36
+ - MIT
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 2.2.2
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: ''
58
+ test_files: []
59
+ has_rdoc: