botz 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -1
- data/bin/console +3 -3
- data/botz.gemspec +1 -0
- data/exe/botz +11 -5
- data/lib/botz/console.rb +33 -0
- data/lib/botz/{macro.rb → definition.rb} +10 -11
- data/lib/botz/definition_file.rb +37 -0
- data/lib/botz/html_scraper_macro.rb +3 -4
- data/lib/botz/shell.rb +9 -9
- data/lib/botz/version.rb +1 -1
- data/lib/botz.rb +14 -10
- metadata +19 -4
- data/lib/botz/command.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9fc66e77b306d2024ecaf98a7de9cb68a4489a42d0ccd7435245b4d6a385161b
|
4
|
+
data.tar.gz: f42f5b708de3b5e52ea29947a5d08cc31bcb0dadd89dcaa69b71de9b5ad001a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e8c1e31edde612bcf993fd470fc4249bfd236ffd2e3640cca22f1098d22a5c7b0bfa9ade04f0263c94df5faa9901b4743dcc5cc7f343512c9b6e079711a78f4
|
7
|
+
data.tar.gz: 52e4dbb1fe4ac0f9b07654d012a51e3a17b86a8279fe99eb34bb445fbc9845589b8adce472a92f636609c938ef486a9b86f81f31a5831f7315afdabafca5a492
|
data/Gemfile.lock
CHANGED
data/bin/console
CHANGED
@@ -10,9 +10,9 @@ require 'botz'
|
|
10
10
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
11
|
require 'pry'
|
12
12
|
def reload!
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ActiveSupport::Dependencies.clear
|
14
|
+
ActiveSupport::DescendantsTracker.clear
|
15
|
+
ActiveSupport::Reloader.reload!
|
16
16
|
end
|
17
17
|
Pry.start
|
18
18
|
|
data/botz.gemspec
CHANGED
@@ -29,6 +29,7 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_development_dependency 'rake', '~> 10.0'
|
30
30
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
31
31
|
|
32
|
+
spec.add_runtime_dependency 'pry'
|
32
33
|
spec.add_runtime_dependency 'activemodel', '~> 5.2'
|
33
34
|
spec.add_runtime_dependency 'activesupport', '~> 5.2'
|
34
35
|
spec.add_runtime_dependency 'mechanize'
|
data/exe/botz
CHANGED
@@ -2,9 +2,15 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'botz'
|
5
|
-
|
6
|
-
case ARGV[
|
7
|
-
when :spider then shell.spider(ARGV[2])
|
8
|
-
when :scraper then shell.scraper(ARGV[2])
|
9
|
-
when :shell then shell.function
|
5
|
+
|
6
|
+
case ARGV[0]&.to_sym
|
7
|
+
when :spider then Botz.open(ARGV[1]).shell.spider(ARGV[2])
|
8
|
+
when :scraper then Botz.open(ARGV[1]).shell.scraper(ARGV[2])
|
9
|
+
when :shell then Botz.open(ARGV[1]).shell.function
|
10
|
+
when :console
|
11
|
+
if ARGV[1].blank?
|
12
|
+
Botz.console
|
13
|
+
else
|
14
|
+
Botz.open(ARGV[1]).console
|
15
|
+
end
|
10
16
|
end
|
data/lib/botz/console.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# botz console
|
5
|
+
#
|
6
|
+
class Botz::Console
|
7
|
+
attr_reader :definition_file
|
8
|
+
delegate :spiders, :scrapers, to: :definition_file
|
9
|
+
|
10
|
+
def initialize(definition_file = nil)
|
11
|
+
@definition_file = definition_file
|
12
|
+
end
|
13
|
+
|
14
|
+
def open(filepath)
|
15
|
+
@definition_file = Botz::DefinitionFile.open(filepath)
|
16
|
+
end
|
17
|
+
|
18
|
+
def reload!
|
19
|
+
@definition_file&.eval_definition
|
20
|
+
end
|
21
|
+
|
22
|
+
def scraper(name, url, &block)
|
23
|
+
scrapers[name.to_sym].call(url, &block)
|
24
|
+
end
|
25
|
+
|
26
|
+
def spider(name, url = nil, &block)
|
27
|
+
if url
|
28
|
+
spiders[name.to_sym].call(url, &block)
|
29
|
+
else
|
30
|
+
spiders[name.to_sym].call(&block)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -4,21 +4,20 @@
|
|
4
4
|
# Class representing a website defined by DSL
|
5
5
|
#
|
6
6
|
class Botz::Definition
|
7
|
-
Output = ->(result) { STDOUT.puts(result.to_json) }
|
8
|
-
|
9
|
-
class_attribute :name
|
10
7
|
class_attribute :domain
|
11
8
|
class_attribute :spiders, default: {}
|
12
9
|
class_attribute :scrapers, default: {}
|
13
10
|
|
11
|
+
Output = ->(result) { STDOUT.puts(result.to_json) }
|
12
|
+
|
13
|
+
def output(&block)
|
14
|
+
remove_const(:Output)
|
15
|
+
const_set(:Output, block)
|
16
|
+
end
|
17
|
+
|
14
18
|
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
15
19
|
class << self
|
16
|
-
def
|
17
|
-
remove_const(:Output)
|
18
|
-
const_set(:Output, block)
|
19
|
-
end
|
20
|
-
|
21
|
-
def scraper(name, as: :html, output: Output, &block)
|
20
|
+
def scraper(name, as: :html, &block)
|
22
21
|
class_name = "#{name}_scraper".classify
|
23
22
|
downloader = Botz::Downloader.new(as)
|
24
23
|
binder_base = Botz.const_get "#{as}_scraper_macro".classify
|
@@ -29,10 +28,10 @@ class Botz::Definition
|
|
29
28
|
define_singleton_method(:crawler_class) { crawler_class }
|
30
29
|
define_singleton_method(:bind) do |url|
|
31
30
|
downloader.call(url) do |resource|
|
32
|
-
binder.new(scraper_class, resource
|
31
|
+
binder.new(scraper_class, resource)
|
33
32
|
end
|
34
33
|
end
|
35
|
-
define_singleton_method(:call) { |url| bind(url).
|
34
|
+
define_singleton_method(:call) { |url, &output| bind(url).call(&output) }
|
36
35
|
end
|
37
36
|
const_set(class_name, scraper_class)
|
38
37
|
scrapers[name] = scraper_class
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# botz interface binding
|
5
|
+
#
|
6
|
+
class Botz::DefinitionFile
|
7
|
+
attr_reader :path
|
8
|
+
attr_reader :definition
|
9
|
+
delegate :spiders, :scrapers, :Output, to: :definition
|
10
|
+
|
11
|
+
def self.open(filepath)
|
12
|
+
object = new(filepath)
|
13
|
+
object.eval_definition
|
14
|
+
object
|
15
|
+
end
|
16
|
+
|
17
|
+
# rubocop:disable Security/Eval
|
18
|
+
def eval_definition
|
19
|
+
@definition = eval(File.open(path).read)
|
20
|
+
end
|
21
|
+
# rubocop:enable Security/Eval
|
22
|
+
|
23
|
+
def shell
|
24
|
+
@shell ||= Botz::Shell.new(self)
|
25
|
+
end
|
26
|
+
|
27
|
+
def console
|
28
|
+
require 'pry'
|
29
|
+
Pry.start(Botz::Console.new(self))
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def initialize(path)
|
35
|
+
@path = path
|
36
|
+
end
|
37
|
+
end
|
@@ -19,9 +19,8 @@ class Botz::HtmlScraperMacro
|
|
19
19
|
attr_reader :scraper_class
|
20
20
|
attr_reader :url
|
21
21
|
attr_reader :html
|
22
|
-
attr_reader :writer
|
23
22
|
|
24
|
-
def initialize(scraper_class, resource
|
23
|
+
def initialize(scraper_class, resource)
|
25
24
|
@scraper_class = scraper_class
|
26
25
|
@url = resource.uri
|
27
26
|
@html = resource
|
@@ -45,10 +44,10 @@ class Botz::HtmlScraperMacro
|
|
45
44
|
self.class.field_names.map { |field| [field, send(field)] }.to_h.merge(timestamps)
|
46
45
|
end
|
47
46
|
|
48
|
-
def
|
47
|
+
def call
|
49
48
|
fail Error.new(scraper_class, url, errors) if invalid?
|
50
49
|
|
51
|
-
|
50
|
+
yield(to_h)
|
52
51
|
end
|
53
52
|
|
54
53
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
data/lib/botz/shell.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
#
|
4
|
-
# botz shell
|
4
|
+
# botz shell interface
|
5
5
|
#
|
6
6
|
class Botz::Shell
|
7
|
-
|
8
|
-
|
7
|
+
attr_reader :definition_file
|
8
|
+
delegate :spiders, :scrapers, to: :definition_file
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
def initialize(definition_file)
|
11
|
+
@definition_file = definition_file
|
12
|
+
end
|
13
13
|
|
14
14
|
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
15
15
|
def scraper(name)
|
@@ -17,7 +17,7 @@ class Botz::Shell
|
|
17
17
|
while line = STDIN.gets
|
18
18
|
url = line.strip
|
19
19
|
begin
|
20
|
-
command.call(url)
|
20
|
+
command.call(url, &definition_file.Output)
|
21
21
|
rescue
|
22
22
|
STDERR.puts "ERROR #{command} #{url}"
|
23
23
|
end
|
@@ -40,10 +40,10 @@ class Botz::Shell
|
|
40
40
|
def function
|
41
41
|
print <<~SHELL
|
42
42
|
function spider() {
|
43
|
-
botz #{
|
43
|
+
botz spider #{definition_file.path} $1
|
44
44
|
}
|
45
45
|
function scraper() {
|
46
|
-
botz #{
|
46
|
+
botz scraper #{definition_file.path} $1
|
47
47
|
}
|
48
48
|
SHELL
|
49
49
|
end
|
data/lib/botz/version.rb
CHANGED
data/lib/botz.rb
CHANGED
@@ -11,27 +11,31 @@ require 'mechanize'
|
|
11
11
|
module Botz
|
12
12
|
extend ActiveSupport::Autoload
|
13
13
|
autoload :Shell
|
14
|
-
autoload :
|
14
|
+
autoload :Console
|
15
15
|
autoload :Definition
|
16
|
+
autoload :DefinitionFile
|
16
17
|
autoload :Spider
|
17
18
|
autoload :Downloader
|
18
19
|
autoload :HtmlScraperMacro
|
19
20
|
|
20
|
-
|
21
|
-
remove_const(:Crawler) if const_defined?(:Crawler)
|
22
|
-
const_set(:Crawler, Module.new)
|
21
|
+
const_set(:Crawler, Module.new) unless const_defined?(:Crawler)
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
def self.console
|
24
|
+
require 'pry'
|
25
|
+
Pry.start(Botz::Console.new)
|
26
|
+
end
|
27
27
|
|
28
|
-
|
28
|
+
def self.open(filepath)
|
29
|
+
::Botz::DefinitionFile.open(filepath)
|
29
30
|
end
|
30
31
|
|
31
32
|
def self.define(name, domain:, &block)
|
32
|
-
crawler_definition = Class.new(Botz::Definition, &block)
|
33
|
+
crawler_definition = Class.new(::Botz::Definition, &block)
|
33
34
|
crawler_definition.domain = domain
|
34
|
-
|
35
|
+
crawler_class_name = name.to_s.camelize
|
36
|
+
|
37
|
+
Crawler.class_eval { remove_const(crawler_class_name) } if Crawler.const_defined?(crawler_class_name)
|
38
|
+
Crawler.const_set(crawler_class_name, crawler_definition)
|
35
39
|
crawler_definition
|
36
40
|
end
|
37
41
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: botz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: activemodel
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -134,10 +148,11 @@ files:
|
|
134
148
|
- botz.gemspec
|
135
149
|
- exe/botz
|
136
150
|
- lib/botz.rb
|
137
|
-
- lib/botz/
|
151
|
+
- lib/botz/console.rb
|
152
|
+
- lib/botz/definition.rb
|
153
|
+
- lib/botz/definition_file.rb
|
138
154
|
- lib/botz/downloader.rb
|
139
155
|
- lib/botz/html_scraper_macro.rb
|
140
|
-
- lib/botz/macro.rb
|
141
156
|
- lib/botz/shell.rb
|
142
157
|
- lib/botz/spider.rb
|
143
158
|
- lib/botz/version.rb
|