botz 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/botz.rb +2 -0
- data/lib/botz/binder.rb +19 -12
- data/lib/botz/connector.rb +1 -1
- data/lib/botz/connector/html.rb +6 -12
- data/lib/botz/connector/xml.rb +31 -0
- data/lib/botz/definition_file.rb +6 -0
- data/lib/botz/result.rb +3 -3
- data/lib/botz/shell.rb +3 -1
- data/lib/botz/version.rb +1 -1
- metadata +3 -3
- data/lib/botz/connector/direct_html.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5d5f01d8ad6efa00d67e23c3f7252151685ce114f8519b8b93fdbe1ce05f42e
|
4
|
+
data.tar.gz: c11d70d862a38a7526e630d19e00f84564c894ca7c57ddb761712bdffe68afe6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 256330ef0de6b6106dacd0a9d87bc28ed0bf051a1e5f09be5e3d054709744541f60c5699fbbc8abd311942a2136a209044ca9e1a00937e1dcba3b7f5db117d0f
|
7
|
+
data.tar.gz: 33a770696b537a3ffc3a61486d4cf758f80829a31b3b791ee588ce9cd0d59aca1008de7722897cae3a5eafa9e2e9e9a12763dc9f46cfc6f0ac98127409924adf
|
data/Gemfile.lock
CHANGED
data/lib/botz.rb
CHANGED
data/lib/botz/binder.rb
CHANGED
@@ -4,16 +4,13 @@
|
|
4
4
|
# Bind resource received from the connection to the result object
|
5
5
|
#
|
6
6
|
class Botz::Binder
|
7
|
-
class_attribute :field_names, default: []
|
8
|
-
attr_reader :resource
|
9
|
-
|
10
7
|
#
|
11
8
|
# binding multiple
|
12
9
|
#
|
13
10
|
class Multiple
|
14
11
|
def self.bind(connector:, binder:, query:, block:)
|
15
12
|
multiple_binding_class = self
|
16
|
-
connector.field(binder, query) do |elements|
|
13
|
+
connector.field.call(binder, query) do |elements|
|
17
14
|
multiple_binding_class.new(binder.class).instance_exec(elements, &block)
|
18
15
|
end
|
19
16
|
end
|
@@ -31,32 +28,42 @@ class Botz::Binder
|
|
31
28
|
end
|
32
29
|
end
|
33
30
|
|
31
|
+
class_attribute :field_names, default: []
|
32
|
+
attr_reader :resource
|
33
|
+
|
34
34
|
def initialize(resource)
|
35
35
|
@resource = resource
|
36
36
|
self.class.fields_call(self)
|
37
37
|
end
|
38
38
|
|
39
39
|
def result
|
40
|
-
|
40
|
+
definition = self
|
41
|
+
fetched_at = Time.current
|
42
|
+
result = self.class.result_class.new(fetched_at: fetched_at, fetched_on: fetched_at.beginning_of_day, **attributes)
|
43
|
+
result.define_singleton_method(:resource) { definition.resource }
|
44
|
+
result
|
41
45
|
end
|
42
46
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
47
|
+
def attributes_to_array
|
48
|
+
field_names.map { |field_name| send(field_name) }
|
49
|
+
end
|
50
|
+
|
51
|
+
def attributes
|
52
|
+
field_names.map { |field_name| [field_name, send(field_name)] }.to_h
|
46
53
|
end
|
47
54
|
|
48
55
|
def self.query(name, query = nil, &block)
|
49
56
|
define_method(name) do
|
50
|
-
connector.field(self, query, &block)
|
57
|
+
connector.field.call(self, query, &block)
|
51
58
|
end
|
52
59
|
end
|
53
60
|
|
54
|
-
def self.field(name, query = nil, &block)
|
61
|
+
def self.field(name, query = nil, optional: false, &block)
|
55
62
|
field_names << name
|
56
63
|
field_names.uniq!
|
57
|
-
result_class.define(name)
|
64
|
+
result_class.define(name, presence: !optional)
|
58
65
|
define_method(name) do
|
59
|
-
connector.field(self, query, &block)
|
66
|
+
connector.field.call(self, query, &block)
|
60
67
|
end
|
61
68
|
end
|
62
69
|
|
data/lib/botz/connector.rb
CHANGED
data/lib/botz/connector/html.rb
CHANGED
@@ -4,19 +4,13 @@
|
|
4
4
|
# Mechanize wrapper
|
5
5
|
#
|
6
6
|
class Botz::Connector::Html
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
def field(object, query, &block)
|
12
|
-
node = object.resource.search(query)
|
13
|
-
fail "Could not be located #{query}" if node.nil?
|
14
|
-
return node.first.text if block.nil?
|
7
|
+
class_attribute :field, default: (lambda { |object, query, &block|
|
8
|
+
node = object.resource.search(query)
|
9
|
+
fail "Could not be located #{query}" if node.nil?
|
10
|
+
return node.first.text if block.nil?
|
15
11
|
|
16
|
-
|
17
|
-
|
18
|
-
end
|
19
|
-
include Field
|
12
|
+
object.instance_exec(node, &block)
|
13
|
+
})
|
20
14
|
|
21
15
|
USER_AGENT = [
|
22
16
|
'Mozilla/5.0',
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# xml
|
5
|
+
#
|
6
|
+
class Botz::Connector::Xml
|
7
|
+
class_attribute :field, default: (lambda { |object, query, optional: false, &block|
|
8
|
+
return object.instance_exec(object.resource, &block) if query.nil?
|
9
|
+
|
10
|
+
node = object.resource.search(query)
|
11
|
+
return if optional && node.empty?
|
12
|
+
|
13
|
+
fail "Could not be located #{query}" if node.empty?
|
14
|
+
return node.first.text if block.nil?
|
15
|
+
|
16
|
+
object.instance_exec(node, &block)
|
17
|
+
})
|
18
|
+
|
19
|
+
def initialize(start_url: nil, encoding: nil)
|
20
|
+
@start_url = start_url
|
21
|
+
@encoding = encoding
|
22
|
+
end
|
23
|
+
|
24
|
+
def call(url = @start_url)
|
25
|
+
fail 'URL is undefined' if url.blank?
|
26
|
+
|
27
|
+
xml =
|
28
|
+
Nokogiri::XML(OpenURI.open_uri(url).read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''))
|
29
|
+
yield xml
|
30
|
+
end
|
31
|
+
end
|
data/lib/botz/definition_file.rb
CHANGED
@@ -8,6 +8,12 @@ class Botz::DefinitionFile
|
|
8
8
|
attr_reader :definition
|
9
9
|
delegate :spiders, :scrapers, :output, to: :definition
|
10
10
|
|
11
|
+
CSV = lambda do |result|
|
12
|
+
::CSV.generate do |csv|
|
13
|
+
csv << result.definition.attributes_to_array
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
11
17
|
def self.open(filepath)
|
12
18
|
object = new(filepath)
|
13
19
|
object.eval_definition
|
data/lib/botz/result.rb
CHANGED
@@ -7,14 +7,14 @@ class Botz::Result
|
|
7
7
|
include ActiveModel::Model
|
8
8
|
include ActiveModel::Attributes
|
9
9
|
|
10
|
-
def self.define(name)
|
10
|
+
def self.define(name, presence: true)
|
11
11
|
case name
|
12
12
|
when /.*\?/
|
13
13
|
attribute name, :boolean
|
14
|
-
validates name, inclusion: { in: [true, false] }
|
14
|
+
validates name, inclusion: { in: [true, false] } if presence
|
15
15
|
else
|
16
16
|
attribute name
|
17
|
-
validates name, presence: true, allow_blank: true
|
17
|
+
validates name, presence: true, allow_blank: true if presence
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
data/lib/botz/shell.rb
CHANGED
@@ -14,12 +14,14 @@ class Botz::Shell
|
|
14
14
|
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
15
15
|
def scraper(name)
|
16
16
|
command = scrapers[name.to_sym]
|
17
|
+
fail "undefined commmand[#{name}]" if command.nil?
|
18
|
+
|
17
19
|
while line = STDIN.gets
|
18
20
|
url = line.strip
|
19
21
|
begin
|
20
22
|
command.call(url, &definition_file.output)
|
21
23
|
rescue => e
|
22
|
-
STDERR.puts "ERROR #{e}"
|
24
|
+
STDERR.puts "ERROR #{url}: #{e}\n#{e.backtrace}"
|
23
25
|
end
|
24
26
|
end
|
25
27
|
end
|
data/lib/botz/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: botz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,8 +150,8 @@ files:
|
|
150
150
|
- lib/botz.rb
|
151
151
|
- lib/botz/binder.rb
|
152
152
|
- lib/botz/connector.rb
|
153
|
-
- lib/botz/connector/direct_html.rb
|
154
153
|
- lib/botz/connector/html.rb
|
154
|
+
- lib/botz/connector/xml.rb
|
155
155
|
- lib/botz/console.rb
|
156
156
|
- lib/botz/definition.rb
|
157
157
|
- lib/botz/definition_file.rb
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
#
|
4
|
-
# Nokogiri wrapper
|
5
|
-
#
|
6
|
-
class Botz::Connector::DirectHtml
|
7
|
-
include ::Botz::Connector::Html::Field
|
8
|
-
|
9
|
-
def initialize(encoding: nil)
|
10
|
-
@encoding = encoding
|
11
|
-
end
|
12
|
-
|
13
|
-
def call(html)
|
14
|
-
yield Nokogiri::HTML.parse(html)
|
15
|
-
end
|
16
|
-
end
|