botz 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8dc85ac7df0f64087255c2aa722ba3e6151baca615ce00d9ce3c7cebd022f231
4
- data.tar.gz: 7f77369c8e744b050b2b08273b35f1e95e299d8f8adad3e7760e41f920a10af9
3
+ metadata.gz: c5d5f01d8ad6efa00d67e23c3f7252151685ce114f8519b8b93fdbe1ce05f42e
4
+ data.tar.gz: c11d70d862a38a7526e630d19e00f84564c894ca7c57ddb761712bdffe68afe6
5
5
  SHA512:
6
- metadata.gz: 4d7f5be597513d01464b23d6053d037b014008735745cb267d9299581efd8f11b1c5109929490b80cfbfee850c88202a3f0d298341a192cfe1f96212c72d8eee
7
- data.tar.gz: 2f885e634ed4fe24ebd642bd65cf7c7c059eb1ec31d7a4e842cec0663527ba1da8366f719ded80a9e43346de8aa98e6f23564cde7f8e01265684bd44fbf5d5b5
6
+ metadata.gz: 256330ef0de6b6106dacd0a9d87bc28ed0bf051a1e5f09be5e3d054709744541f60c5699fbbc8abd311942a2136a209044ca9e1a00937e1dcba3b7f5db117d0f
7
+ data.tar.gz: 33a770696b537a3ffc3a61486d4cf758f80829a31b3b791ee588ce9cd0d59aca1008de7722897cae3a5eafa9e2e9e9a12763dc9f46cfc6f0ac98127409924adf
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- botz (0.5.0)
4
+ botz (0.7.0)
5
5
  activemodel (~> 5.2)
6
6
  activesupport (~> 5.2)
7
7
  mechanize
data/lib/botz.rb CHANGED
@@ -4,6 +4,8 @@ require 'botz/version'
4
4
  require 'active_support/all'
5
5
  require 'active_model'
6
6
  require 'mechanize'
7
+ require 'csv'
8
+ require 'open-uri'
7
9
 
8
10
  #
9
11
  # web bot dsl engine
data/lib/botz/binder.rb CHANGED
@@ -4,16 +4,13 @@
4
4
  # Bind resource received from the connection to the result object
5
5
  #
6
6
  class Botz::Binder
7
- class_attribute :field_names, default: []
8
- attr_reader :resource
9
-
10
7
  #
11
8
  # binding multiple
12
9
  #
13
10
  class Multiple
14
11
  def self.bind(connector:, binder:, query:, block:)
15
12
  multiple_binding_class = self
16
- connector.field(binder, query) do |elements|
13
+ connector.field.call(binder, query) do |elements|
17
14
  multiple_binding_class.new(binder.class).instance_exec(elements, &block)
18
15
  end
19
16
  end
@@ -31,32 +28,42 @@ class Botz::Binder
31
28
  end
32
29
  end
33
30
 
31
+ class_attribute :field_names, default: []
32
+ attr_reader :resource
33
+
34
34
  def initialize(resource)
35
35
  @resource = resource
36
36
  self.class.fields_call(self)
37
37
  end
38
38
 
39
39
  def result
40
- new_result(field_names.map { |field_name| [field_name, send(field_name)] }.to_h)
40
+ definition = self
41
+ fetched_at = Time.current
42
+ result = self.class.result_class.new(fetched_at: fetched_at, fetched_on: fetched_at.beginning_of_day, **attributes)
43
+ result.define_singleton_method(:resource) { definition.resource }
44
+ result
41
45
  end
42
46
 
43
- def new_result(values)
44
- fetched_at = Time.current
45
- self.class.result_class.new(fetched_at: fetched_at, fetched_on: fetched_at.beginning_of_day, **values)
47
+ def attributes_to_array
48
+ field_names.map { |field_name| send(field_name) }
49
+ end
50
+
51
+ def attributes
52
+ field_names.map { |field_name| [field_name, send(field_name)] }.to_h
46
53
  end
47
54
 
48
55
  def self.query(name, query = nil, &block)
49
56
  define_method(name) do
50
- connector.field(self, query, &block)
57
+ connector.field.call(self, query, &block)
51
58
  end
52
59
  end
53
60
 
54
- def self.field(name, query = nil, &block)
61
+ def self.field(name, query = nil, optional: false, &block)
55
62
  field_names << name
56
63
  field_names.uniq!
57
- result_class.define(name)
64
+ result_class.define(name, presence: !optional)
58
65
  define_method(name) do
59
- connector.field(self, query, &block)
66
+ connector.field.call(self, query, &block)
60
67
  end
61
68
  end
62
69
 
@@ -6,5 +6,5 @@
6
6
  module Botz::Connector
7
7
  extend ActiveSupport::Autoload
8
8
  autoload :Html
9
- autoload :DirectHtml
9
+ autoload :Xml
10
10
  end
@@ -4,19 +4,13 @@
4
4
  # Mechanize wrapper
5
5
  #
6
6
  class Botz::Connector::Html
7
- #
8
- # field macro
9
- #
10
- module Field
11
- def field(object, query, &block)
12
- node = object.resource.search(query)
13
- fail "Could not be located #{query}" if node.nil?
14
- return node.first.text if block.nil?
7
+ class_attribute :field, default: (lambda { |object, query, &block|
8
+ node = object.resource.search(query)
9
+ fail "Could not be located #{query}" if node.nil?
10
+ return node.first.text if block.nil?
15
11
 
16
- object.instance_exec(node, &block)
17
- end
18
- end
19
- include Field
12
+ object.instance_exec(node, &block)
13
+ })
20
14
 
21
15
  USER_AGENT = [
22
16
  'Mozilla/5.0',
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # xml
5
+ #
6
+ class Botz::Connector::Xml
7
+ class_attribute :field, default: (lambda { |object, query, optional: false, &block|
8
+ return object.instance_exec(object.resource, &block) if query.nil?
9
+
10
+ node = object.resource.search(query)
11
+ return if optional && node.empty?
12
+
13
+ fail "Could not be located #{query}" if node.empty?
14
+ return node.first.text if block.nil?
15
+
16
+ object.instance_exec(node, &block)
17
+ })
18
+
19
+ def initialize(start_url: nil, encoding: nil)
20
+ @start_url = start_url
21
+ @encoding = encoding
22
+ end
23
+
24
+ def call(url = @start_url)
25
+ fail 'URL is undefined' if url.blank?
26
+
27
+ xml =
28
+ Nokogiri::XML(OpenURI.open_uri(url).read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''))
29
+ yield xml
30
+ end
31
+ end
@@ -8,6 +8,12 @@ class Botz::DefinitionFile
8
8
  attr_reader :definition
9
9
  delegate :spiders, :scrapers, :output, to: :definition
10
10
 
11
+ CSV = lambda do |result|
12
+ ::CSV.generate do |csv|
13
+ csv << result.definition.attributes_to_array
14
+ end
15
+ end
16
+
11
17
  def self.open(filepath)
12
18
  object = new(filepath)
13
19
  object.eval_definition
data/lib/botz/result.rb CHANGED
@@ -7,14 +7,14 @@ class Botz::Result
7
7
  include ActiveModel::Model
8
8
  include ActiveModel::Attributes
9
9
 
10
- def self.define(name)
10
+ def self.define(name, presence: true)
11
11
  case name
12
12
  when /.*\?/
13
13
  attribute name, :boolean
14
- validates name, inclusion: { in: [true, false] }
14
+ validates name, inclusion: { in: [true, false] } if presence
15
15
  else
16
16
  attribute name
17
- validates name, presence: true, allow_blank: true
17
+ validates name, presence: true, allow_blank: true if presence
18
18
  end
19
19
  end
20
20
 
data/lib/botz/shell.rb CHANGED
@@ -14,12 +14,14 @@ class Botz::Shell
14
14
  # rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
15
15
  def scraper(name)
16
16
  command = scrapers[name.to_sym]
17
+ fail "undefined commmand[#{name}]" if command.nil?
18
+
17
19
  while line = STDIN.gets
18
20
  url = line.strip
19
21
  begin
20
22
  command.call(url, &definition_file.output)
21
23
  rescue => e
22
- STDERR.puts "ERROR #{e}"
24
+ STDERR.puts "ERROR #{url}: #{e}\n#{e.backtrace}"
23
25
  end
24
26
  end
25
27
  end
data/lib/botz/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Botz
4
- VERSION = '0.6.0'
4
+ VERSION = '0.7.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: botz
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-16 00:00:00.000000000 Z
11
+ date: 2019-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -150,8 +150,8 @@ files:
150
150
  - lib/botz.rb
151
151
  - lib/botz/binder.rb
152
152
  - lib/botz/connector.rb
153
- - lib/botz/connector/direct_html.rb
154
153
  - lib/botz/connector/html.rb
154
+ - lib/botz/connector/xml.rb
155
155
  - lib/botz/console.rb
156
156
  - lib/botz/definition.rb
157
157
  - lib/botz/definition_file.rb
@@ -1,16 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- #
4
- # Nokogiri wrapper
5
- #
6
- class Botz::Connector::DirectHtml
7
- include ::Botz::Connector::Html::Field
8
-
9
- def initialize(encoding: nil)
10
- @encoding = encoding
11
- end
12
-
13
- def call(html)
14
- yield Nokogiri::HTML.parse(html)
15
- end
16
- end