botz 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8f0f5bd0c1efc41fefee5be1ad905144c6c2af0e20c7163ec1ddac75b59b03c
4
- data.tar.gz: d1fd0bf8a10ae82a5ca51a81993143964ff20911573785a8cb5d1f3e8facbed5
3
+ metadata.gz: 9fc66e77b306d2024ecaf98a7de9cb68a4489a42d0ccd7435245b4d6a385161b
4
+ data.tar.gz: f42f5b708de3b5e52ea29947a5d08cc31bcb0dadd89dcaa69b71de9b5ad001a9
5
5
  SHA512:
6
- metadata.gz: 582acc3d2f94b021943d517ab62c77d42f759d8c197c4728e2a49ce4c4127f105ce0cbcb3a822e5fb7aefe8d90e8d6234d682e855cff5209d1c12d8f78f31d8d
7
- data.tar.gz: 43efe75756b292eb80b8acce890fd37f15845cc9868cca6c46946ab0008d947afe9abbbef23bc71256fb9c4c87ff79a809d81f8f6fcd178ee0add6117fa9290f
6
+ metadata.gz: 4e8c1e31edde612bcf993fd470fc4249bfd236ffd2e3640cca22f1098d22a5c7b0bfa9ade04f0263c94df5faa9901b4743dcc5cc7f343512c9b6e079711a78f4
7
+ data.tar.gz: 52e4dbb1fe4ac0f9b07654d012a51e3a17b86a8279fe99eb34bb445fbc9845589b8adce472a92f636609c938ef486a9b86f81f31a5831f7315afdabafca5a492
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- botz (0.1.0)
4
+ botz (0.2.0)
5
5
  activemodel (~> 5.2)
6
6
  activesupport (~> 5.2)
7
7
  mechanize
8
+ pry
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
data/bin/console CHANGED
@@ -10,9 +10,9 @@ require 'botz'
10
10
  # (If you use this, don't forget to add pry to your Gemfile!)
11
11
  require 'pry'
12
12
  def reload!
13
- files = $LOADED_FEATURES.select { |feat| feat =~ %r{/gem_name/} }
14
- files.each { |file| load file }
15
- load 'lib/botz.rb'
13
+ ActiveSupport::Dependencies.clear
14
+ ActiveSupport::DescendantsTracker.clear
15
+ ActiveSupport::Reloader.reload!
16
16
  end
17
17
  Pry.start
18
18
 
data/botz.gemspec CHANGED
@@ -29,6 +29,7 @@ Gem::Specification.new do |spec|
29
29
  spec.add_development_dependency 'rake', '~> 10.0'
30
30
  spec.add_development_dependency 'rspec', '~> 3.0'
31
31
 
32
+ spec.add_runtime_dependency 'pry'
32
33
  spec.add_runtime_dependency 'activemodel', '~> 5.2'
33
34
  spec.add_runtime_dependency 'activesupport', '~> 5.2'
34
35
  spec.add_runtime_dependency 'mechanize'
data/exe/botz CHANGED
@@ -2,9 +2,15 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require 'botz'
5
- shell = Botz.open(ARGV[0]).shell
6
- case ARGV[1]&.to_sym
7
- when :spider then shell.spider(ARGV[2])
8
- when :scraper then shell.scraper(ARGV[2])
9
- when :shell then shell.function
5
+
6
+ case ARGV[0]&.to_sym
7
+ when :spider then Botz.open(ARGV[1]).shell.spider(ARGV[2])
8
+ when :scraper then Botz.open(ARGV[1]).shell.scraper(ARGV[2])
9
+ when :shell then Botz.open(ARGV[1]).shell.function
10
+ when :console
11
+ if ARGV[1].blank?
12
+ Botz.console
13
+ else
14
+ Botz.open(ARGV[1]).console
15
+ end
10
16
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # botz console
5
+ #
6
+ class Botz::Console
7
+ attr_reader :definition_file
8
+ delegate :spiders, :scrapers, to: :definition_file
9
+
10
+ def initialize(definition_file = nil)
11
+ @definition_file = definition_file
12
+ end
13
+
14
+ def open(filepath)
15
+ @definition_file = Botz::DefinitionFile.open(filepath)
16
+ end
17
+
18
+ def reload!
19
+ @definition_file&.eval_definition
20
+ end
21
+
22
+ def scraper(name, url, &block)
23
+ scrapers[name.to_sym].call(url, &block)
24
+ end
25
+
26
+ def spider(name, url = nil, &block)
27
+ if url
28
+ spiders[name.to_sym].call(url, &block)
29
+ else
30
+ spiders[name.to_sym].call(&block)
31
+ end
32
+ end
33
+ end
@@ -4,21 +4,20 @@
4
4
  # Class representing a website defined by DSL
5
5
  #
6
6
  class Botz::Definition
7
- Output = ->(result) { STDOUT.puts(result.to_json) }
8
-
9
- class_attribute :name
10
7
  class_attribute :domain
11
8
  class_attribute :spiders, default: {}
12
9
  class_attribute :scrapers, default: {}
13
10
 
11
+ Output = ->(result) { STDOUT.puts(result.to_json) }
12
+
13
+ def output(&block)
14
+ remove_const(:Output)
15
+ const_set(:Output, block)
16
+ end
17
+
14
18
  # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
15
19
  class << self
16
- def output(&block)
17
- remove_const(:Output)
18
- const_set(:Output, block)
19
- end
20
-
21
- def scraper(name, as: :html, output: Output, &block)
20
+ def scraper(name, as: :html, &block)
22
21
  class_name = "#{name}_scraper".classify
23
22
  downloader = Botz::Downloader.new(as)
24
23
  binder_base = Botz.const_get "#{as}_scraper_macro".classify
@@ -29,10 +28,10 @@ class Botz::Definition
29
28
  define_singleton_method(:crawler_class) { crawler_class }
30
29
  define_singleton_method(:bind) do |url|
31
30
  downloader.call(url) do |resource|
32
- binder.new(scraper_class, resource, output)
31
+ binder.new(scraper_class, resource)
33
32
  end
34
33
  end
35
- define_singleton_method(:call) { |url| bind(url).save }
34
+ define_singleton_method(:call) { |url, &output| bind(url).call(&output) }
36
35
  end
37
36
  const_set(class_name, scraper_class)
38
37
  scrapers[name] = scraper_class
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # botz interface binding
5
+ #
6
+ class Botz::DefinitionFile
7
+ attr_reader :path
8
+ attr_reader :definition
9
+ delegate :spiders, :scrapers, :Output, to: :definition
10
+
11
+ def self.open(filepath)
12
+ object = new(filepath)
13
+ object.eval_definition
14
+ object
15
+ end
16
+
17
+ # rubocop:disable Security/Eval
18
+ def eval_definition
19
+ @definition = eval(File.open(path).read)
20
+ end
21
+ # rubocop:enable Security/Eval
22
+
23
+ def shell
24
+ @shell ||= Botz::Shell.new(self)
25
+ end
26
+
27
+ def console
28
+ require 'pry'
29
+ Pry.start(Botz::Console.new(self))
30
+ end
31
+
32
+ private
33
+
34
+ def initialize(path)
35
+ @path = path
36
+ end
37
+ end
@@ -19,9 +19,8 @@ class Botz::HtmlScraperMacro
19
19
  attr_reader :scraper_class
20
20
  attr_reader :url
21
21
  attr_reader :html
22
- attr_reader :writer
23
22
 
24
- def initialize(scraper_class, resource, writer)
23
+ def initialize(scraper_class, resource)
25
24
  @scraper_class = scraper_class
26
25
  @url = resource.uri
27
26
  @html = resource
@@ -45,10 +44,10 @@ class Botz::HtmlScraperMacro
45
44
  self.class.field_names.map { |field| [field, send(field)] }.to_h.merge(timestamps)
46
45
  end
47
46
 
48
- def save
47
+ def call
49
48
  fail Error.new(scraper_class, url, errors) if invalid?
50
49
 
51
- writer.call(to_h)
50
+ yield(to_h)
52
51
  end
53
52
 
54
53
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
data/lib/botz/shell.rb CHANGED
@@ -1,15 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  #
4
- # botz shell access interface
4
+ # botz shell interface
5
5
  #
6
6
  class Botz::Shell
7
- include ActiveModel::Model
8
- include ActiveModel::Attributes
7
+ attr_reader :definition_file
8
+ delegate :spiders, :scrapers, to: :definition_file
9
9
 
10
- attribute :filepath
11
- attribute :definition
12
- delegate :domain, :spiders, :scrapers, to: :definition
10
+ def initialize(definition_file)
11
+ @definition_file = definition_file
12
+ end
13
13
 
14
14
  # rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
15
15
  def scraper(name)
@@ -17,7 +17,7 @@ class Botz::Shell
17
17
  while line = STDIN.gets
18
18
  url = line.strip
19
19
  begin
20
- command.call(url)
20
+ command.call(url, &definition_file.Output)
21
21
  rescue
22
22
  STDERR.puts "ERROR #{command} #{url}"
23
23
  end
@@ -40,10 +40,10 @@ class Botz::Shell
40
40
  def function
41
41
  print <<~SHELL
42
42
  function spider() {
43
- botz #{filepath} spider $1
43
+ botz spider #{definition_file.path} $1
44
44
  }
45
45
  function scraper() {
46
- botz #{filepath} scraper $1
46
+ botz scraper #{definition_file.path} $1
47
47
  }
48
48
  SHELL
49
49
  end
data/lib/botz/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Botz
4
- VERSION = '0.2.0'
4
+ VERSION = '0.3.0'
5
5
  end
data/lib/botz.rb CHANGED
@@ -11,27 +11,31 @@ require 'mechanize'
11
11
  module Botz
12
12
  extend ActiveSupport::Autoload
13
13
  autoload :Shell
14
- autoload :Command
14
+ autoload :Console
15
15
  autoload :Definition
16
+ autoload :DefinitionFile
16
17
  autoload :Spider
17
18
  autoload :Downloader
18
19
  autoload :HtmlScraperMacro
19
20
 
20
- def self.open(filepath)
21
- remove_const(:Crawler) if const_defined?(:Crawler)
22
- const_set(:Crawler, Module.new)
21
+ const_set(:Crawler, Module.new) unless const_defined?(:Crawler)
23
22
 
24
- # rubocop:disable Security/Eval
25
- definition = eval(File.open(filepath).read)
26
- # rubocop:enable Security/Eval
23
+ def self.console
24
+ require 'pry'
25
+ Pry.start(Botz::Console.new)
26
+ end
27
27
 
28
- Botz::Command.new(filepath: filepath, definition: definition)
28
+ def self.open(filepath)
29
+ ::Botz::DefinitionFile.open(filepath)
29
30
  end
30
31
 
31
32
  def self.define(name, domain:, &block)
32
- crawler_definition = Class.new(Botz::Definition, &block)
33
+ crawler_definition = Class.new(::Botz::Definition, &block)
33
34
  crawler_definition.domain = domain
34
- Crawler.const_set(name.to_s.camelize, crawler_definition)
35
+ crawler_class_name = name.to_s.camelize
36
+
37
+ Crawler.class_eval { remove_const(crawler_class_name) } if Crawler.const_defined?(crawler_class_name)
38
+ Crawler.const_set(crawler_class_name, crawler_definition)
35
39
  crawler_definition
36
40
  end
37
41
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: botz
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-28 00:00:00.000000000 Z
11
+ date: 2019-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: activemodel
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -134,10 +148,11 @@ files:
134
148
  - botz.gemspec
135
149
  - exe/botz
136
150
  - lib/botz.rb
137
- - lib/botz/command.rb
151
+ - lib/botz/console.rb
152
+ - lib/botz/definition.rb
153
+ - lib/botz/definition_file.rb
138
154
  - lib/botz/downloader.rb
139
155
  - lib/botz/html_scraper_macro.rb
140
- - lib/botz/macro.rb
141
156
  - lib/botz/shell.rb
142
157
  - lib/botz/spider.rb
143
158
  - lib/botz/version.rb
data/lib/botz/command.rb DELETED
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class Botz::Command
4
- attr_reader :definition
5
- attr_reader :shell
6
-
7
- def initialize(definition)
8
- @definition = definition
9
- @shell = Botz::Shell.new(definition: definition)
10
- end
11
- end