botz 0.6.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8dc85ac7df0f64087255c2aa722ba3e6151baca615ce00d9ce3c7cebd022f231
4
- data.tar.gz: 7f77369c8e744b050b2b08273b35f1e95e299d8f8adad3e7760e41f920a10af9
3
+ metadata.gz: c5d5f01d8ad6efa00d67e23c3f7252151685ce114f8519b8b93fdbe1ce05f42e
4
+ data.tar.gz: c11d70d862a38a7526e630d19e00f84564c894ca7c57ddb761712bdffe68afe6
5
5
  SHA512:
6
- metadata.gz: 4d7f5be597513d01464b23d6053d037b014008735745cb267d9299581efd8f11b1c5109929490b80cfbfee850c88202a3f0d298341a192cfe1f96212c72d8eee
7
- data.tar.gz: 2f885e634ed4fe24ebd642bd65cf7c7c059eb1ec31d7a4e842cec0663527ba1da8366f719ded80a9e43346de8aa98e6f23564cde7f8e01265684bd44fbf5d5b5
6
+ metadata.gz: 256330ef0de6b6106dacd0a9d87bc28ed0bf051a1e5f09be5e3d054709744541f60c5699fbbc8abd311942a2136a209044ca9e1a00937e1dcba3b7f5db117d0f
7
+ data.tar.gz: 33a770696b537a3ffc3a61486d4cf758f80829a31b3b791ee588ce9cd0d59aca1008de7722897cae3a5eafa9e2e9e9a12763dc9f46cfc6f0ac98127409924adf
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- botz (0.5.0)
4
+ botz (0.7.0)
5
5
  activemodel (~> 5.2)
6
6
  activesupport (~> 5.2)
7
7
  mechanize
data/lib/botz.rb CHANGED
@@ -4,6 +4,8 @@ require 'botz/version'
4
4
  require 'active_support/all'
5
5
  require 'active_model'
6
6
  require 'mechanize'
7
+ require 'csv'
8
+ require 'open-uri'
7
9
 
8
10
  #
9
11
  # web bot dsl engine
data/lib/botz/binder.rb CHANGED
@@ -4,16 +4,13 @@
4
4
  # Bind resource received from the connection to the result object
5
5
  #
6
6
  class Botz::Binder
7
- class_attribute :field_names, default: []
8
- attr_reader :resource
9
-
10
7
  #
11
8
  # binding multiple
12
9
  #
13
10
  class Multiple
14
11
  def self.bind(connector:, binder:, query:, block:)
15
12
  multiple_binding_class = self
16
- connector.field(binder, query) do |elements|
13
+ connector.field.call(binder, query) do |elements|
17
14
  multiple_binding_class.new(binder.class).instance_exec(elements, &block)
18
15
  end
19
16
  end
@@ -31,32 +28,42 @@ class Botz::Binder
31
28
  end
32
29
  end
33
30
 
31
+ class_attribute :field_names, default: []
32
+ attr_reader :resource
33
+
34
34
  def initialize(resource)
35
35
  @resource = resource
36
36
  self.class.fields_call(self)
37
37
  end
38
38
 
39
39
  def result
40
- new_result(field_names.map { |field_name| [field_name, send(field_name)] }.to_h)
40
+ definition = self
41
+ fetched_at = Time.current
42
+ result = self.class.result_class.new(fetched_at: fetched_at, fetched_on: fetched_at.beginning_of_day, **attributes)
43
+ result.define_singleton_method(:resource) { definition.resource }
44
+ result
41
45
  end
42
46
 
43
- def new_result(values)
44
- fetched_at = Time.current
45
- self.class.result_class.new(fetched_at: fetched_at, fetched_on: fetched_at.beginning_of_day, **values)
47
+ def attributes_to_array
48
+ field_names.map { |field_name| send(field_name) }
49
+ end
50
+
51
+ def attributes
52
+ field_names.map { |field_name| [field_name, send(field_name)] }.to_h
46
53
  end
47
54
 
48
55
  def self.query(name, query = nil, &block)
49
56
  define_method(name) do
50
- connector.field(self, query, &block)
57
+ connector.field.call(self, query, &block)
51
58
  end
52
59
  end
53
60
 
54
- def self.field(name, query = nil, &block)
61
+ def self.field(name, query = nil, optional: false, &block)
55
62
  field_names << name
56
63
  field_names.uniq!
57
- result_class.define(name)
64
+ result_class.define(name, presence: !optional)
58
65
  define_method(name) do
59
- connector.field(self, query, &block)
66
+ connector.field.call(self, query, &block)
60
67
  end
61
68
  end
62
69
 
@@ -6,5 +6,5 @@
6
6
  module Botz::Connector
7
7
  extend ActiveSupport::Autoload
8
8
  autoload :Html
9
- autoload :DirectHtml
9
+ autoload :Xml
10
10
  end
@@ -4,19 +4,13 @@
4
4
  # Mechanize wrapper
5
5
  #
6
6
  class Botz::Connector::Html
7
- #
8
- # field macro
9
- #
10
- module Field
11
- def field(object, query, &block)
12
- node = object.resource.search(query)
13
- fail "Could not be located #{query}" if node.nil?
14
- return node.first.text if block.nil?
7
+ class_attribute :field, default: (lambda { |object, query, &block|
8
+ node = object.resource.search(query)
9
+ fail "Could not be located #{query}" if node.nil?
10
+ return node.first.text if block.nil?
15
11
 
16
- object.instance_exec(node, &block)
17
- end
18
- end
19
- include Field
12
+ object.instance_exec(node, &block)
13
+ })
20
14
 
21
15
  USER_AGENT = [
22
16
  'Mozilla/5.0',
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # xml
5
+ #
6
+ class Botz::Connector::Xml
7
+ class_attribute :field, default: (lambda { |object, query, optional: false, &block|
8
+ return object.instance_exec(object.resource, &block) if query.nil?
9
+
10
+ node = object.resource.search(query)
11
+ return if optional && node.empty?
12
+
13
+ fail "Could not be located #{query}" if node.empty?
14
+ return node.first.text if block.nil?
15
+
16
+ object.instance_exec(node, &block)
17
+ })
18
+
19
+ def initialize(start_url: nil, encoding: nil)
20
+ @start_url = start_url
21
+ @encoding = encoding
22
+ end
23
+
24
+ def call(url = @start_url)
25
+ fail 'URL is undefined' if url.blank?
26
+
27
+ xml =
28
+ Nokogiri::XML(OpenURI.open_uri(url).read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''))
29
+ yield xml
30
+ end
31
+ end
@@ -8,6 +8,12 @@ class Botz::DefinitionFile
8
8
  attr_reader :definition
9
9
  delegate :spiders, :scrapers, :output, to: :definition
10
10
 
11
+ CSV = lambda do |result|
12
+ ::CSV.generate do |csv|
13
+ csv << result.definition.attributes_to_array
14
+ end
15
+ end
16
+
11
17
  def self.open(filepath)
12
18
  object = new(filepath)
13
19
  object.eval_definition
data/lib/botz/result.rb CHANGED
@@ -7,14 +7,14 @@ class Botz::Result
7
7
  include ActiveModel::Model
8
8
  include ActiveModel::Attributes
9
9
 
10
- def self.define(name)
10
+ def self.define(name, presence: true)
11
11
  case name
12
12
  when /.*\?/
13
13
  attribute name, :boolean
14
- validates name, inclusion: { in: [true, false] }
14
+ validates name, inclusion: { in: [true, false] } if presence
15
15
  else
16
16
  attribute name
17
- validates name, presence: true, allow_blank: true
17
+ validates name, presence: true, allow_blank: true if presence
18
18
  end
19
19
  end
20
20
 
data/lib/botz/shell.rb CHANGED
@@ -14,12 +14,14 @@ class Botz::Shell
14
14
  # rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
15
15
  def scraper(name)
16
16
  command = scrapers[name.to_sym]
17
+ fail "undefined commmand[#{name}]" if command.nil?
18
+
17
19
  while line = STDIN.gets
18
20
  url = line.strip
19
21
  begin
20
22
  command.call(url, &definition_file.output)
21
23
  rescue => e
22
- STDERR.puts "ERROR #{e}"
24
+ STDERR.puts "ERROR #{url}: #{e}\n#{e.backtrace}"
23
25
  end
24
26
  end
25
27
  end
data/lib/botz/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Botz
4
- VERSION = '0.6.0'
4
+ VERSION = '0.7.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: botz
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-16 00:00:00.000000000 Z
11
+ date: 2019-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -150,8 +150,8 @@ files:
150
150
  - lib/botz.rb
151
151
  - lib/botz/binder.rb
152
152
  - lib/botz/connector.rb
153
- - lib/botz/connector/direct_html.rb
154
153
  - lib/botz/connector/html.rb
154
+ - lib/botz/connector/xml.rb
155
155
  - lib/botz/console.rb
156
156
  - lib/botz/definition.rb
157
157
  - lib/botz/definition_file.rb
@@ -1,16 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- #
4
- # Nokogiri wrapper
5
- #
6
- class Botz::Connector::DirectHtml
7
- include ::Botz::Connector::Html::Field
8
-
9
- def initialize(encoding: nil)
10
- @encoding = encoding
11
- end
12
-
13
- def call(html)
14
- yield Nokogiri::HTML.parse(html)
15
- end
16
- end