botz 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8f0f5bd0c1efc41fefee5be1ad905144c6c2af0e20c7163ec1ddac75b59b03c
4
- data.tar.gz: d1fd0bf8a10ae82a5ca51a81993143964ff20911573785a8cb5d1f3e8facbed5
3
+ metadata.gz: 9fc66e77b306d2024ecaf98a7de9cb68a4489a42d0ccd7435245b4d6a385161b
4
+ data.tar.gz: f42f5b708de3b5e52ea29947a5d08cc31bcb0dadd89dcaa69b71de9b5ad001a9
5
5
  SHA512:
6
- metadata.gz: 582acc3d2f94b021943d517ab62c77d42f759d8c197c4728e2a49ce4c4127f105ce0cbcb3a822e5fb7aefe8d90e8d6234d682e855cff5209d1c12d8f78f31d8d
7
- data.tar.gz: 43efe75756b292eb80b8acce890fd37f15845cc9868cca6c46946ab0008d947afe9abbbef23bc71256fb9c4c87ff79a809d81f8f6fcd178ee0add6117fa9290f
6
+ metadata.gz: 4e8c1e31edde612bcf993fd470fc4249bfd236ffd2e3640cca22f1098d22a5c7b0bfa9ade04f0263c94df5faa9901b4743dcc5cc7f343512c9b6e079711a78f4
7
+ data.tar.gz: 52e4dbb1fe4ac0f9b07654d012a51e3a17b86a8279fe99eb34bb445fbc9845589b8adce472a92f636609c938ef486a9b86f81f31a5831f7315afdabafca5a492
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- botz (0.1.0)
4
+ botz (0.2.0)
5
5
  activemodel (~> 5.2)
6
6
  activesupport (~> 5.2)
7
7
  mechanize
8
+ pry
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
data/bin/console CHANGED
@@ -10,9 +10,9 @@ require 'botz'
10
10
  # (If you use this, don't forget to add pry to your Gemfile!)
11
11
  require 'pry'
12
12
  def reload!
13
- files = $LOADED_FEATURES.select { |feat| feat =~ %r{/gem_name/} }
14
- files.each { |file| load file }
15
- load 'lib/botz.rb'
13
+ ActiveSupport::Dependencies.clear
14
+ ActiveSupport::DescendantsTracker.clear
15
+ ActiveSupport::Reloader.reload!
16
16
  end
17
17
  Pry.start
18
18
 
data/botz.gemspec CHANGED
@@ -29,6 +29,7 @@ Gem::Specification.new do |spec|
29
29
  spec.add_development_dependency 'rake', '~> 10.0'
30
30
  spec.add_development_dependency 'rspec', '~> 3.0'
31
31
 
32
+ spec.add_runtime_dependency 'pry'
32
33
  spec.add_runtime_dependency 'activemodel', '~> 5.2'
33
34
  spec.add_runtime_dependency 'activesupport', '~> 5.2'
34
35
  spec.add_runtime_dependency 'mechanize'
data/exe/botz CHANGED
@@ -2,9 +2,15 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require 'botz'
5
- shell = Botz.open(ARGV[0]).shell
6
- case ARGV[1]&.to_sym
7
- when :spider then shell.spider(ARGV[2])
8
- when :scraper then shell.scraper(ARGV[2])
9
- when :shell then shell.function
5
+
6
+ case ARGV[0]&.to_sym
7
+ when :spider then Botz.open(ARGV[1]).shell.spider(ARGV[2])
8
+ when :scraper then Botz.open(ARGV[1]).shell.scraper(ARGV[2])
9
+ when :shell then Botz.open(ARGV[1]).shell.function
10
+ when :console
11
+ if ARGV[1].blank?
12
+ Botz.console
13
+ else
14
+ Botz.open(ARGV[1]).console
15
+ end
10
16
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # botz console
5
+ #
6
+ class Botz::Console
7
+ attr_reader :definition_file
8
+ delegate :spiders, :scrapers, to: :definition_file
9
+
10
+ def initialize(definition_file = nil)
11
+ @definition_file = definition_file
12
+ end
13
+
14
+ def open(filepath)
15
+ @definition_file = Botz::DefinitionFile.open(filepath)
16
+ end
17
+
18
+ def reload!
19
+ @definition_file&.eval_definition
20
+ end
21
+
22
+ def scraper(name, url, &block)
23
+ scrapers[name.to_sym].call(url, &block)
24
+ end
25
+
26
+ def spider(name, url = nil, &block)
27
+ if url
28
+ spiders[name.to_sym].call(url, &block)
29
+ else
30
+ spiders[name.to_sym].call(&block)
31
+ end
32
+ end
33
+ end
@@ -4,21 +4,20 @@
4
4
  # Class representing a website defined by DSL
5
5
  #
6
6
  class Botz::Definition
7
- Output = ->(result) { STDOUT.puts(result.to_json) }
8
-
9
- class_attribute :name
10
7
  class_attribute :domain
11
8
  class_attribute :spiders, default: {}
12
9
  class_attribute :scrapers, default: {}
13
10
 
11
+ Output = ->(result) { STDOUT.puts(result.to_json) }
12
+
13
+ def output(&block)
14
+ remove_const(:Output)
15
+ const_set(:Output, block)
16
+ end
17
+
14
18
  # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
15
19
  class << self
16
- def output(&block)
17
- remove_const(:Output)
18
- const_set(:Output, block)
19
- end
20
-
21
- def scraper(name, as: :html, output: Output, &block)
20
+ def scraper(name, as: :html, &block)
22
21
  class_name = "#{name}_scraper".classify
23
22
  downloader = Botz::Downloader.new(as)
24
23
  binder_base = Botz.const_get "#{as}_scraper_macro".classify
@@ -29,10 +28,10 @@ class Botz::Definition
29
28
  define_singleton_method(:crawler_class) { crawler_class }
30
29
  define_singleton_method(:bind) do |url|
31
30
  downloader.call(url) do |resource|
32
- binder.new(scraper_class, resource, output)
31
+ binder.new(scraper_class, resource)
33
32
  end
34
33
  end
35
- define_singleton_method(:call) { |url| bind(url).save }
34
+ define_singleton_method(:call) { |url, &output| bind(url).call(&output) }
36
35
  end
37
36
  const_set(class_name, scraper_class)
38
37
  scrapers[name] = scraper_class
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # botz interface binding
5
+ #
6
+ class Botz::DefinitionFile
7
+ attr_reader :path
8
+ attr_reader :definition
9
+ delegate :spiders, :scrapers, :Output, to: :definition
10
+
11
+ def self.open(filepath)
12
+ object = new(filepath)
13
+ object.eval_definition
14
+ object
15
+ end
16
+
17
+ # rubocop:disable Security/Eval
18
+ def eval_definition
19
+ @definition = eval(File.open(path).read)
20
+ end
21
+ # rubocop:enable Security/Eval
22
+
23
+ def shell
24
+ @shell ||= Botz::Shell.new(self)
25
+ end
26
+
27
+ def console
28
+ require 'pry'
29
+ Pry.start(Botz::Console.new(self))
30
+ end
31
+
32
+ private
33
+
34
+ def initialize(path)
35
+ @path = path
36
+ end
37
+ end
@@ -19,9 +19,8 @@ class Botz::HtmlScraperMacro
19
19
  attr_reader :scraper_class
20
20
  attr_reader :url
21
21
  attr_reader :html
22
- attr_reader :writer
23
22
 
24
- def initialize(scraper_class, resource, writer)
23
+ def initialize(scraper_class, resource)
25
24
  @scraper_class = scraper_class
26
25
  @url = resource.uri
27
26
  @html = resource
@@ -45,10 +44,10 @@ class Botz::HtmlScraperMacro
45
44
  self.class.field_names.map { |field| [field, send(field)] }.to_h.merge(timestamps)
46
45
  end
47
46
 
48
- def save
47
+ def call
49
48
  fail Error.new(scraper_class, url, errors) if invalid?
50
49
 
51
- writer.call(to_h)
50
+ yield(to_h)
52
51
  end
53
52
 
54
53
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
data/lib/botz/shell.rb CHANGED
@@ -1,15 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  #
4
- # botz shell access interface
4
+ # botz shell interface
5
5
  #
6
6
  class Botz::Shell
7
- include ActiveModel::Model
8
- include ActiveModel::Attributes
7
+ attr_reader :definition_file
8
+ delegate :spiders, :scrapers, to: :definition_file
9
9
 
10
- attribute :filepath
11
- attribute :definition
12
- delegate :domain, :spiders, :scrapers, to: :definition
10
+ def initialize(definition_file)
11
+ @definition_file = definition_file
12
+ end
13
13
 
14
14
  # rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
15
15
  def scraper(name)
@@ -17,7 +17,7 @@ class Botz::Shell
17
17
  while line = STDIN.gets
18
18
  url = line.strip
19
19
  begin
20
- command.call(url)
20
+ command.call(url, &definition_file.Output)
21
21
  rescue
22
22
  STDERR.puts "ERROR #{command} #{url}"
23
23
  end
@@ -40,10 +40,10 @@ class Botz::Shell
40
40
  def function
41
41
  print <<~SHELL
42
42
  function spider() {
43
- botz #{filepath} spider $1
43
+ botz spider #{definition_file.path} $1
44
44
  }
45
45
  function scraper() {
46
- botz #{filepath} scraper $1
46
+ botz scraper #{definition_file.path} $1
47
47
  }
48
48
  SHELL
49
49
  end
data/lib/botz/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Botz
4
- VERSION = '0.2.0'
4
+ VERSION = '0.3.0'
5
5
  end
data/lib/botz.rb CHANGED
@@ -11,27 +11,31 @@ require 'mechanize'
11
11
  module Botz
12
12
  extend ActiveSupport::Autoload
13
13
  autoload :Shell
14
- autoload :Command
14
+ autoload :Console
15
15
  autoload :Definition
16
+ autoload :DefinitionFile
16
17
  autoload :Spider
17
18
  autoload :Downloader
18
19
  autoload :HtmlScraperMacro
19
20
 
20
- def self.open(filepath)
21
- remove_const(:Crawler) if const_defined?(:Crawler)
22
- const_set(:Crawler, Module.new)
21
+ const_set(:Crawler, Module.new) unless const_defined?(:Crawler)
23
22
 
24
- # rubocop:disable Security/Eval
25
- definition = eval(File.open(filepath).read)
26
- # rubocop:enable Security/Eval
23
+ def self.console
24
+ require 'pry'
25
+ Pry.start(Botz::Console.new)
26
+ end
27
27
 
28
- Botz::Command.new(filepath: filepath, definition: definition)
28
+ def self.open(filepath)
29
+ ::Botz::DefinitionFile.open(filepath)
29
30
  end
30
31
 
31
32
  def self.define(name, domain:, &block)
32
- crawler_definition = Class.new(Botz::Definition, &block)
33
+ crawler_definition = Class.new(::Botz::Definition, &block)
33
34
  crawler_definition.domain = domain
34
- Crawler.const_set(name.to_s.camelize, crawler_definition)
35
+ crawler_class_name = name.to_s.camelize
36
+
37
+ Crawler.class_eval { remove_const(crawler_class_name) } if Crawler.const_defined?(crawler_class_name)
38
+ Crawler.const_set(crawler_class_name, crawler_definition)
35
39
  crawler_definition
36
40
  end
37
41
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: botz
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-28 00:00:00.000000000 Z
11
+ date: 2019-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: activemodel
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -134,10 +148,11 @@ files:
134
148
  - botz.gemspec
135
149
  - exe/botz
136
150
  - lib/botz.rb
137
- - lib/botz/command.rb
151
+ - lib/botz/console.rb
152
+ - lib/botz/definition.rb
153
+ - lib/botz/definition_file.rb
138
154
  - lib/botz/downloader.rb
139
155
  - lib/botz/html_scraper_macro.rb
140
- - lib/botz/macro.rb
141
156
  - lib/botz/shell.rb
142
157
  - lib/botz/spider.rb
143
158
  - lib/botz/version.rb
data/lib/botz/command.rb DELETED
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class Botz::Command
4
- attr_reader :definition
5
- attr_reader :shell
6
-
7
- def initialize(definition)
8
- @definition = definition
9
- @shell = Botz::Shell.new(definition: definition)
10
- end
11
- end