botz 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -1
- data/bin/console +3 -3
- data/botz.gemspec +1 -0
- data/exe/botz +11 -5
- data/lib/botz/console.rb +33 -0
- data/lib/botz/{macro.rb → definition.rb} +10 -11
- data/lib/botz/definition_file.rb +37 -0
- data/lib/botz/html_scraper_macro.rb +3 -4
- data/lib/botz/shell.rb +9 -9
- data/lib/botz/version.rb +1 -1
- data/lib/botz.rb +14 -10
- metadata +19 -4
- data/lib/botz/command.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9fc66e77b306d2024ecaf98a7de9cb68a4489a42d0ccd7435245b4d6a385161b
|
4
|
+
data.tar.gz: f42f5b708de3b5e52ea29947a5d08cc31bcb0dadd89dcaa69b71de9b5ad001a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e8c1e31edde612bcf993fd470fc4249bfd236ffd2e3640cca22f1098d22a5c7b0bfa9ade04f0263c94df5faa9901b4743dcc5cc7f343512c9b6e079711a78f4
|
7
|
+
data.tar.gz: 52e4dbb1fe4ac0f9b07654d012a51e3a17b86a8279fe99eb34bb445fbc9845589b8adce472a92f636609c938ef486a9b86f81f31a5831f7315afdabafca5a492
|
data/Gemfile.lock
CHANGED
data/bin/console
CHANGED
@@ -10,9 +10,9 @@ require 'botz'
|
|
10
10
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
11
|
require 'pry'
|
12
12
|
def reload!
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ActiveSupport::Dependencies.clear
|
14
|
+
ActiveSupport::DescendantsTracker.clear
|
15
|
+
ActiveSupport::Reloader.reload!
|
16
16
|
end
|
17
17
|
Pry.start
|
18
18
|
|
data/botz.gemspec
CHANGED
@@ -29,6 +29,7 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_development_dependency 'rake', '~> 10.0'
|
30
30
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
31
31
|
|
32
|
+
spec.add_runtime_dependency 'pry'
|
32
33
|
spec.add_runtime_dependency 'activemodel', '~> 5.2'
|
33
34
|
spec.add_runtime_dependency 'activesupport', '~> 5.2'
|
34
35
|
spec.add_runtime_dependency 'mechanize'
|
data/exe/botz
CHANGED
@@ -2,9 +2,15 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'botz'
|
5
|
-
|
6
|
-
case ARGV[
|
7
|
-
when :spider then shell.spider(ARGV[2])
|
8
|
-
when :scraper then shell.scraper(ARGV[2])
|
9
|
-
when :shell then shell.function
|
5
|
+
|
6
|
+
case ARGV[0]&.to_sym
|
7
|
+
when :spider then Botz.open(ARGV[1]).shell.spider(ARGV[2])
|
8
|
+
when :scraper then Botz.open(ARGV[1]).shell.scraper(ARGV[2])
|
9
|
+
when :shell then Botz.open(ARGV[1]).shell.function
|
10
|
+
when :console
|
11
|
+
if ARGV[1].blank?
|
12
|
+
Botz.console
|
13
|
+
else
|
14
|
+
Botz.open(ARGV[1]).console
|
15
|
+
end
|
10
16
|
end
|
data/lib/botz/console.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# botz console
|
5
|
+
#
|
6
|
+
class Botz::Console
|
7
|
+
attr_reader :definition_file
|
8
|
+
delegate :spiders, :scrapers, to: :definition_file
|
9
|
+
|
10
|
+
def initialize(definition_file = nil)
|
11
|
+
@definition_file = definition_file
|
12
|
+
end
|
13
|
+
|
14
|
+
def open(filepath)
|
15
|
+
@definition_file = Botz::DefinitionFile.open(filepath)
|
16
|
+
end
|
17
|
+
|
18
|
+
def reload!
|
19
|
+
@definition_file&.eval_definition
|
20
|
+
end
|
21
|
+
|
22
|
+
def scraper(name, url, &block)
|
23
|
+
scrapers[name.to_sym].call(url, &block)
|
24
|
+
end
|
25
|
+
|
26
|
+
def spider(name, url = nil, &block)
|
27
|
+
if url
|
28
|
+
spiders[name.to_sym].call(url, &block)
|
29
|
+
else
|
30
|
+
spiders[name.to_sym].call(&block)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -4,21 +4,20 @@
|
|
4
4
|
# Class representing a website defined by DSL
|
5
5
|
#
|
6
6
|
class Botz::Definition
|
7
|
-
Output = ->(result) { STDOUT.puts(result.to_json) }
|
8
|
-
|
9
|
-
class_attribute :name
|
10
7
|
class_attribute :domain
|
11
8
|
class_attribute :spiders, default: {}
|
12
9
|
class_attribute :scrapers, default: {}
|
13
10
|
|
11
|
+
Output = ->(result) { STDOUT.puts(result.to_json) }
|
12
|
+
|
13
|
+
def output(&block)
|
14
|
+
remove_const(:Output)
|
15
|
+
const_set(:Output, block)
|
16
|
+
end
|
17
|
+
|
14
18
|
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
15
19
|
class << self
|
16
|
-
def
|
17
|
-
remove_const(:Output)
|
18
|
-
const_set(:Output, block)
|
19
|
-
end
|
20
|
-
|
21
|
-
def scraper(name, as: :html, output: Output, &block)
|
20
|
+
def scraper(name, as: :html, &block)
|
22
21
|
class_name = "#{name}_scraper".classify
|
23
22
|
downloader = Botz::Downloader.new(as)
|
24
23
|
binder_base = Botz.const_get "#{as}_scraper_macro".classify
|
@@ -29,10 +28,10 @@ class Botz::Definition
|
|
29
28
|
define_singleton_method(:crawler_class) { crawler_class }
|
30
29
|
define_singleton_method(:bind) do |url|
|
31
30
|
downloader.call(url) do |resource|
|
32
|
-
binder.new(scraper_class, resource
|
31
|
+
binder.new(scraper_class, resource)
|
33
32
|
end
|
34
33
|
end
|
35
|
-
define_singleton_method(:call) { |url| bind(url).
|
34
|
+
define_singleton_method(:call) { |url, &output| bind(url).call(&output) }
|
36
35
|
end
|
37
36
|
const_set(class_name, scraper_class)
|
38
37
|
scrapers[name] = scraper_class
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# botz interface binding
|
5
|
+
#
|
6
|
+
class Botz::DefinitionFile
|
7
|
+
attr_reader :path
|
8
|
+
attr_reader :definition
|
9
|
+
delegate :spiders, :scrapers, :Output, to: :definition
|
10
|
+
|
11
|
+
def self.open(filepath)
|
12
|
+
object = new(filepath)
|
13
|
+
object.eval_definition
|
14
|
+
object
|
15
|
+
end
|
16
|
+
|
17
|
+
# rubocop:disable Security/Eval
|
18
|
+
def eval_definition
|
19
|
+
@definition = eval(File.open(path).read)
|
20
|
+
end
|
21
|
+
# rubocop:enable Security/Eval
|
22
|
+
|
23
|
+
def shell
|
24
|
+
@shell ||= Botz::Shell.new(self)
|
25
|
+
end
|
26
|
+
|
27
|
+
def console
|
28
|
+
require 'pry'
|
29
|
+
Pry.start(Botz::Console.new(self))
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def initialize(path)
|
35
|
+
@path = path
|
36
|
+
end
|
37
|
+
end
|
@@ -19,9 +19,8 @@ class Botz::HtmlScraperMacro
|
|
19
19
|
attr_reader :scraper_class
|
20
20
|
attr_reader :url
|
21
21
|
attr_reader :html
|
22
|
-
attr_reader :writer
|
23
22
|
|
24
|
-
def initialize(scraper_class, resource
|
23
|
+
def initialize(scraper_class, resource)
|
25
24
|
@scraper_class = scraper_class
|
26
25
|
@url = resource.uri
|
27
26
|
@html = resource
|
@@ -45,10 +44,10 @@ class Botz::HtmlScraperMacro
|
|
45
44
|
self.class.field_names.map { |field| [field, send(field)] }.to_h.merge(timestamps)
|
46
45
|
end
|
47
46
|
|
48
|
-
def
|
47
|
+
def call
|
49
48
|
fail Error.new(scraper_class, url, errors) if invalid?
|
50
49
|
|
51
|
-
|
50
|
+
yield(to_h)
|
52
51
|
end
|
53
52
|
|
54
53
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
data/lib/botz/shell.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
#
|
4
|
-
# botz shell
|
4
|
+
# botz shell interface
|
5
5
|
#
|
6
6
|
class Botz::Shell
|
7
|
-
|
8
|
-
|
7
|
+
attr_reader :definition_file
|
8
|
+
delegate :spiders, :scrapers, to: :definition_file
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
def initialize(definition_file)
|
11
|
+
@definition_file = definition_file
|
12
|
+
end
|
13
13
|
|
14
14
|
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
15
15
|
def scraper(name)
|
@@ -17,7 +17,7 @@ class Botz::Shell
|
|
17
17
|
while line = STDIN.gets
|
18
18
|
url = line.strip
|
19
19
|
begin
|
20
|
-
command.call(url)
|
20
|
+
command.call(url, &definition_file.Output)
|
21
21
|
rescue
|
22
22
|
STDERR.puts "ERROR #{command} #{url}"
|
23
23
|
end
|
@@ -40,10 +40,10 @@ class Botz::Shell
|
|
40
40
|
def function
|
41
41
|
print <<~SHELL
|
42
42
|
function spider() {
|
43
|
-
botz #{
|
43
|
+
botz spider #{definition_file.path} $1
|
44
44
|
}
|
45
45
|
function scraper() {
|
46
|
-
botz #{
|
46
|
+
botz scraper #{definition_file.path} $1
|
47
47
|
}
|
48
48
|
SHELL
|
49
49
|
end
|
data/lib/botz/version.rb
CHANGED
data/lib/botz.rb
CHANGED
@@ -11,27 +11,31 @@ require 'mechanize'
|
|
11
11
|
module Botz
|
12
12
|
extend ActiveSupport::Autoload
|
13
13
|
autoload :Shell
|
14
|
-
autoload :
|
14
|
+
autoload :Console
|
15
15
|
autoload :Definition
|
16
|
+
autoload :DefinitionFile
|
16
17
|
autoload :Spider
|
17
18
|
autoload :Downloader
|
18
19
|
autoload :HtmlScraperMacro
|
19
20
|
|
20
|
-
|
21
|
-
remove_const(:Crawler) if const_defined?(:Crawler)
|
22
|
-
const_set(:Crawler, Module.new)
|
21
|
+
const_set(:Crawler, Module.new) unless const_defined?(:Crawler)
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
def self.console
|
24
|
+
require 'pry'
|
25
|
+
Pry.start(Botz::Console.new)
|
26
|
+
end
|
27
27
|
|
28
|
-
|
28
|
+
def self.open(filepath)
|
29
|
+
::Botz::DefinitionFile.open(filepath)
|
29
30
|
end
|
30
31
|
|
31
32
|
def self.define(name, domain:, &block)
|
32
|
-
crawler_definition = Class.new(Botz::Definition, &block)
|
33
|
+
crawler_definition = Class.new(::Botz::Definition, &block)
|
33
34
|
crawler_definition.domain = domain
|
34
|
-
|
35
|
+
crawler_class_name = name.to_s.camelize
|
36
|
+
|
37
|
+
Crawler.class_eval { remove_const(crawler_class_name) } if Crawler.const_defined?(crawler_class_name)
|
38
|
+
Crawler.const_set(crawler_class_name, crawler_definition)
|
35
39
|
crawler_definition
|
36
40
|
end
|
37
41
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: botz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: activemodel
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -134,10 +148,11 @@ files:
|
|
134
148
|
- botz.gemspec
|
135
149
|
- exe/botz
|
136
150
|
- lib/botz.rb
|
137
|
-
- lib/botz/
|
151
|
+
- lib/botz/console.rb
|
152
|
+
- lib/botz/definition.rb
|
153
|
+
- lib/botz/definition_file.rb
|
138
154
|
- lib/botz/downloader.rb
|
139
155
|
- lib/botz/html_scraper_macro.rb
|
140
|
-
- lib/botz/macro.rb
|
141
156
|
- lib/botz/shell.rb
|
142
157
|
- lib/botz/spider.rb
|
143
158
|
- lib/botz/version.rb
|