botz 0.6.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/botz.rb +2 -0
- data/lib/botz/binder.rb +19 -12
- data/lib/botz/connector.rb +1 -1
- data/lib/botz/connector/html.rb +6 -12
- data/lib/botz/connector/xml.rb +31 -0
- data/lib/botz/definition_file.rb +6 -0
- data/lib/botz/result.rb +3 -3
- data/lib/botz/shell.rb +3 -1
- data/lib/botz/version.rb +1 -1
- metadata +3 -3
- data/lib/botz/connector/direct_html.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5d5f01d8ad6efa00d67e23c3f7252151685ce114f8519b8b93fdbe1ce05f42e
|
4
|
+
data.tar.gz: c11d70d862a38a7526e630d19e00f84564c894ca7c57ddb761712bdffe68afe6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 256330ef0de6b6106dacd0a9d87bc28ed0bf051a1e5f09be5e3d054709744541f60c5699fbbc8abd311942a2136a209044ca9e1a00937e1dcba3b7f5db117d0f
|
7
|
+
data.tar.gz: 33a770696b537a3ffc3a61486d4cf758f80829a31b3b791ee588ce9cd0d59aca1008de7722897cae3a5eafa9e2e9e9a12763dc9f46cfc6f0ac98127409924adf
|
data/Gemfile.lock
CHANGED
data/lib/botz.rb
CHANGED
data/lib/botz/binder.rb
CHANGED
@@ -4,16 +4,13 @@
|
|
4
4
|
# Bind resource received from the connection to the result object
|
5
5
|
#
|
6
6
|
class Botz::Binder
|
7
|
-
class_attribute :field_names, default: []
|
8
|
-
attr_reader :resource
|
9
|
-
|
10
7
|
#
|
11
8
|
# binding multiple
|
12
9
|
#
|
13
10
|
class Multiple
|
14
11
|
def self.bind(connector:, binder:, query:, block:)
|
15
12
|
multiple_binding_class = self
|
16
|
-
connector.field(binder, query) do |elements|
|
13
|
+
connector.field.call(binder, query) do |elements|
|
17
14
|
multiple_binding_class.new(binder.class).instance_exec(elements, &block)
|
18
15
|
end
|
19
16
|
end
|
@@ -31,32 +28,42 @@ class Botz::Binder
|
|
31
28
|
end
|
32
29
|
end
|
33
30
|
|
31
|
+
class_attribute :field_names, default: []
|
32
|
+
attr_reader :resource
|
33
|
+
|
34
34
|
def initialize(resource)
|
35
35
|
@resource = resource
|
36
36
|
self.class.fields_call(self)
|
37
37
|
end
|
38
38
|
|
39
39
|
def result
|
40
|
-
|
40
|
+
definition = self
|
41
|
+
fetched_at = Time.current
|
42
|
+
result = self.class.result_class.new(fetched_at: fetched_at, fetched_on: fetched_at.beginning_of_day, **attributes)
|
43
|
+
result.define_singleton_method(:resource) { definition.resource }
|
44
|
+
result
|
41
45
|
end
|
42
46
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
47
|
+
def attributes_to_array
|
48
|
+
field_names.map { |field_name| send(field_name) }
|
49
|
+
end
|
50
|
+
|
51
|
+
def attributes
|
52
|
+
field_names.map { |field_name| [field_name, send(field_name)] }.to_h
|
46
53
|
end
|
47
54
|
|
48
55
|
def self.query(name, query = nil, &block)
|
49
56
|
define_method(name) do
|
50
|
-
connector.field(self, query, &block)
|
57
|
+
connector.field.call(self, query, &block)
|
51
58
|
end
|
52
59
|
end
|
53
60
|
|
54
|
-
def self.field(name, query = nil, &block)
|
61
|
+
def self.field(name, query = nil, optional: false, &block)
|
55
62
|
field_names << name
|
56
63
|
field_names.uniq!
|
57
|
-
result_class.define(name)
|
64
|
+
result_class.define(name, presence: !optional)
|
58
65
|
define_method(name) do
|
59
|
-
connector.field(self, query, &block)
|
66
|
+
connector.field.call(self, query, &block)
|
60
67
|
end
|
61
68
|
end
|
62
69
|
|
data/lib/botz/connector.rb
CHANGED
data/lib/botz/connector/html.rb
CHANGED
@@ -4,19 +4,13 @@
|
|
4
4
|
# Mechanize wrapper
|
5
5
|
#
|
6
6
|
class Botz::Connector::Html
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
def field(object, query, &block)
|
12
|
-
node = object.resource.search(query)
|
13
|
-
fail "Could not be located #{query}" if node.nil?
|
14
|
-
return node.first.text if block.nil?
|
7
|
+
class_attribute :field, default: (lambda { |object, query, &block|
|
8
|
+
node = object.resource.search(query)
|
9
|
+
fail "Could not be located #{query}" if node.nil?
|
10
|
+
return node.first.text if block.nil?
|
15
11
|
|
16
|
-
|
17
|
-
|
18
|
-
end
|
19
|
-
include Field
|
12
|
+
object.instance_exec(node, &block)
|
13
|
+
})
|
20
14
|
|
21
15
|
USER_AGENT = [
|
22
16
|
'Mozilla/5.0',
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# xml
|
5
|
+
#
|
6
|
+
class Botz::Connector::Xml
|
7
|
+
class_attribute :field, default: (lambda { |object, query, optional: false, &block|
|
8
|
+
return object.instance_exec(object.resource, &block) if query.nil?
|
9
|
+
|
10
|
+
node = object.resource.search(query)
|
11
|
+
return if optional && node.empty?
|
12
|
+
|
13
|
+
fail "Could not be located #{query}" if node.empty?
|
14
|
+
return node.first.text if block.nil?
|
15
|
+
|
16
|
+
object.instance_exec(node, &block)
|
17
|
+
})
|
18
|
+
|
19
|
+
def initialize(start_url: nil, encoding: nil)
|
20
|
+
@start_url = start_url
|
21
|
+
@encoding = encoding
|
22
|
+
end
|
23
|
+
|
24
|
+
def call(url = @start_url)
|
25
|
+
fail 'URL is undefined' if url.blank?
|
26
|
+
|
27
|
+
xml =
|
28
|
+
Nokogiri::XML(OpenURI.open_uri(url).read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''))
|
29
|
+
yield xml
|
30
|
+
end
|
31
|
+
end
|
data/lib/botz/definition_file.rb
CHANGED
@@ -8,6 +8,12 @@ class Botz::DefinitionFile
|
|
8
8
|
attr_reader :definition
|
9
9
|
delegate :spiders, :scrapers, :output, to: :definition
|
10
10
|
|
11
|
+
CSV = lambda do |result|
|
12
|
+
::CSV.generate do |csv|
|
13
|
+
csv << result.definition.attributes_to_array
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
11
17
|
def self.open(filepath)
|
12
18
|
object = new(filepath)
|
13
19
|
object.eval_definition
|
data/lib/botz/result.rb
CHANGED
@@ -7,14 +7,14 @@ class Botz::Result
|
|
7
7
|
include ActiveModel::Model
|
8
8
|
include ActiveModel::Attributes
|
9
9
|
|
10
|
-
def self.define(name)
|
10
|
+
def self.define(name, presence: true)
|
11
11
|
case name
|
12
12
|
when /.*\?/
|
13
13
|
attribute name, :boolean
|
14
|
-
validates name, inclusion: { in: [true, false] }
|
14
|
+
validates name, inclusion: { in: [true, false] } if presence
|
15
15
|
else
|
16
16
|
attribute name
|
17
|
-
validates name, presence: true, allow_blank: true
|
17
|
+
validates name, presence: true, allow_blank: true if presence
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
data/lib/botz/shell.rb
CHANGED
@@ -14,12 +14,14 @@ class Botz::Shell
|
|
14
14
|
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
15
15
|
def scraper(name)
|
16
16
|
command = scrapers[name.to_sym]
|
17
|
+
fail "undefined commmand[#{name}]" if command.nil?
|
18
|
+
|
17
19
|
while line = STDIN.gets
|
18
20
|
url = line.strip
|
19
21
|
begin
|
20
22
|
command.call(url, &definition_file.output)
|
21
23
|
rescue => e
|
22
|
-
STDERR.puts "ERROR #{e}"
|
24
|
+
STDERR.puts "ERROR #{url}: #{e}\n#{e.backtrace}"
|
23
25
|
end
|
24
26
|
end
|
25
27
|
end
|
data/lib/botz/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: botz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,8 +150,8 @@ files:
|
|
150
150
|
- lib/botz.rb
|
151
151
|
- lib/botz/binder.rb
|
152
152
|
- lib/botz/connector.rb
|
153
|
-
- lib/botz/connector/direct_html.rb
|
154
153
|
- lib/botz/connector/html.rb
|
154
|
+
- lib/botz/connector/xml.rb
|
155
155
|
- lib/botz/console.rb
|
156
156
|
- lib/botz/definition.rb
|
157
157
|
- lib/botz/definition_file.rb
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
#
|
4
|
-
# Nokogiri wrapper
|
5
|
-
#
|
6
|
-
class Botz::Connector::DirectHtml
|
7
|
-
include ::Botz::Connector::Html::Field
|
8
|
-
|
9
|
-
def initialize(encoding: nil)
|
10
|
-
@encoding = encoding
|
11
|
-
end
|
12
|
-
|
13
|
-
def call(html)
|
14
|
-
yield Nokogiri::HTML.parse(html)
|
15
|
-
end
|
16
|
-
end
|