botz 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/botz +5 -12
- data/lib/botz/command.rb +5 -39
- data/lib/botz/macro.rb +1 -2
- data/lib/botz/shell.rb +50 -0
- data/lib/botz/version.rb +1 -1
- data/lib/botz.rb +8 -9
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8f0f5bd0c1efc41fefee5be1ad905144c6c2af0e20c7163ec1ddac75b59b03c
|
4
|
+
data.tar.gz: d1fd0bf8a10ae82a5ca51a81993143964ff20911573785a8cb5d1f3e8facbed5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 582acc3d2f94b021943d517ab62c77d42f759d8c197c4728e2a49ce4c4127f105ce0cbcb3a822e5fb7aefe8d90e8d6234d682e855cff5209d1c12d8f78f31d8d
|
7
|
+
data.tar.gz: 43efe75756b292eb80b8acce890fd37f15845cc9868cca6c46946ab0008d947afe9abbbef23bc71256fb9c4c87ff79a809d81f8f6fcd178ee0add6117fa9290f
|
data/exe/botz
CHANGED
@@ -2,16 +2,9 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'botz'
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
function spider() {
|
11
|
-
botz #{ARGV[0]} spider $1
|
12
|
-
}
|
13
|
-
function scraper() {
|
14
|
-
botz #{ARGV[0]} scraper $1
|
15
|
-
}
|
16
|
-
SHELL
|
5
|
+
shell = Botz.open(ARGV[0]).shell
|
6
|
+
case ARGV[1]&.to_sym
|
7
|
+
when :spider then shell.spider(ARGV[2])
|
8
|
+
when :scraper then shell.scraper(ARGV[2])
|
9
|
+
when :shell then shell.function
|
17
10
|
end
|
data/lib/botz/command.rb
CHANGED
@@ -1,45 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
#
|
4
|
-
# Class representing a website defined by DSL
|
5
|
-
#
|
6
3
|
class Botz::Command
|
7
|
-
|
4
|
+
attr_reader :definition
|
5
|
+
attr_reader :shell
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def call(category, name)
|
13
|
-
case category.to_sym
|
14
|
-
when :spider then spider(name)
|
15
|
-
when :scraper then scraper(name)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
20
|
-
def scraper(name)
|
21
|
-
command = scrapers[name.to_sym]
|
22
|
-
while line = STDIN.gets
|
23
|
-
url = line.strip
|
24
|
-
begin
|
25
|
-
command.call(url)
|
26
|
-
rescue
|
27
|
-
STDERR.puts "ERROR #{command} #{url}"
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
# rubocop:enable Lint/AssignmentInCondition, Style/RescueStandardError
|
32
|
-
|
33
|
-
def spider(name)
|
34
|
-
command = spiders[name.to_sym]
|
35
|
-
if File.pipe?(STDIN)
|
36
|
-
STDIN.each_line do |line|
|
37
|
-
start_url = line.strip
|
38
|
-
command.call(start_url) { |url| puts url }
|
39
|
-
end
|
40
|
-
else
|
41
|
-
command.call { |url| puts url }
|
42
|
-
end
|
43
|
-
end
|
7
|
+
def initialize(definition)
|
8
|
+
@definition = definition
|
9
|
+
@shell = Botz::Shell.new(definition: definition)
|
44
10
|
end
|
45
11
|
end
|
data/lib/botz/macro.rb
CHANGED
@@ -3,12 +3,11 @@
|
|
3
3
|
#
|
4
4
|
# Class representing a website defined by DSL
|
5
5
|
#
|
6
|
-
class Botz::
|
6
|
+
class Botz::Definition
|
7
7
|
Output = ->(result) { STDOUT.puts(result.to_json) }
|
8
8
|
|
9
9
|
class_attribute :name
|
10
10
|
class_attribute :domain
|
11
|
-
class_attribute :command
|
12
11
|
class_attribute :spiders, default: {}
|
13
12
|
class_attribute :scrapers, default: {}
|
14
13
|
|
data/lib/botz/shell.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# botz shell access interface
|
5
|
+
#
|
6
|
+
class Botz::Shell
|
7
|
+
include ActiveModel::Model
|
8
|
+
include ActiveModel::Attributes
|
9
|
+
|
10
|
+
attribute :filepath
|
11
|
+
attribute :definition
|
12
|
+
delegate :domain, :spiders, :scrapers, to: :definition
|
13
|
+
|
14
|
+
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
15
|
+
def scraper(name)
|
16
|
+
command = scrapers[name.to_sym]
|
17
|
+
while line = STDIN.gets
|
18
|
+
url = line.strip
|
19
|
+
begin
|
20
|
+
command.call(url)
|
21
|
+
rescue
|
22
|
+
STDERR.puts "ERROR #{command} #{url}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
# rubocop:enable Lint/AssignmentInCondition, Style/RescueStandardError
|
27
|
+
|
28
|
+
def spider(name)
|
29
|
+
command = spiders[name.to_sym]
|
30
|
+
if File.pipe?(STDIN)
|
31
|
+
STDIN.each_line do |line|
|
32
|
+
start_url = line.strip
|
33
|
+
command.call(start_url) { |url| puts url }
|
34
|
+
end
|
35
|
+
else
|
36
|
+
command.call { |url| puts url }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def function
|
41
|
+
print <<~SHELL
|
42
|
+
function spider() {
|
43
|
+
botz #{filepath} spider $1
|
44
|
+
}
|
45
|
+
function scraper() {
|
46
|
+
botz #{filepath} scraper $1
|
47
|
+
}
|
48
|
+
SHELL
|
49
|
+
end
|
50
|
+
end
|
data/lib/botz/version.rb
CHANGED
data/lib/botz.rb
CHANGED
@@ -10,8 +10,9 @@ require 'mechanize'
|
|
10
10
|
#
|
11
11
|
module Botz
|
12
12
|
extend ActiveSupport::Autoload
|
13
|
+
autoload :Shell
|
13
14
|
autoload :Command
|
14
|
-
autoload :
|
15
|
+
autoload :Definition
|
15
16
|
autoload :Spider
|
16
17
|
autoload :Downloader
|
17
18
|
autoload :HtmlScraperMacro
|
@@ -19,20 +20,18 @@ module Botz
|
|
19
20
|
def self.open(filepath)
|
20
21
|
remove_const(:Crawler) if const_defined?(:Crawler)
|
21
22
|
const_set(:Crawler, Module.new)
|
23
|
+
|
22
24
|
# rubocop:disable Security/Eval
|
23
|
-
eval(File.open(filepath).read)
|
25
|
+
definition = eval(File.open(filepath).read)
|
24
26
|
# rubocop:enable Security/Eval
|
27
|
+
|
28
|
+
Botz::Command.new(filepath: filepath, definition: definition)
|
25
29
|
end
|
26
30
|
|
27
31
|
def self.define(name, domain:, &block)
|
28
|
-
crawler_definition = Class.new(Botz::
|
32
|
+
crawler_definition = Class.new(Botz::Definition, &block)
|
29
33
|
crawler_definition.domain = domain
|
30
|
-
|
31
|
-
command_class = Class.new(Botz::Command)
|
32
|
-
command_class.definition = crawler_definition
|
33
|
-
crawler_definition.command = command_class
|
34
|
-
|
35
34
|
Crawler.const_set(name.to_s.camelize, crawler_definition)
|
36
|
-
|
35
|
+
crawler_definition
|
37
36
|
end
|
38
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: botz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -138,6 +138,7 @@ files:
|
|
138
138
|
- lib/botz/downloader.rb
|
139
139
|
- lib/botz/html_scraper_macro.rb
|
140
140
|
- lib/botz/macro.rb
|
141
|
+
- lib/botz/shell.rb
|
141
142
|
- lib/botz/spider.rb
|
142
143
|
- lib/botz/version.rb
|
143
144
|
homepage: https://github.com/aileron-inc/botz
|