botz 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/botz +5 -12
- data/lib/botz/command.rb +5 -39
- data/lib/botz/macro.rb +1 -2
- data/lib/botz/shell.rb +50 -0
- data/lib/botz/version.rb +1 -1
- data/lib/botz.rb +8 -9
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8f0f5bd0c1efc41fefee5be1ad905144c6c2af0e20c7163ec1ddac75b59b03c
|
4
|
+
data.tar.gz: d1fd0bf8a10ae82a5ca51a81993143964ff20911573785a8cb5d1f3e8facbed5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 582acc3d2f94b021943d517ab62c77d42f759d8c197c4728e2a49ce4c4127f105ce0cbcb3a822e5fb7aefe8d90e8d6234d682e855cff5209d1c12d8f78f31d8d
|
7
|
+
data.tar.gz: 43efe75756b292eb80b8acce890fd37f15845cc9868cca6c46946ab0008d947afe9abbbef23bc71256fb9c4c87ff79a809d81f8f6fcd178ee0add6117fa9290f
|
data/exe/botz
CHANGED
@@ -2,16 +2,9 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'botz'
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
function spider() {
|
11
|
-
botz #{ARGV[0]} spider $1
|
12
|
-
}
|
13
|
-
function scraper() {
|
14
|
-
botz #{ARGV[0]} scraper $1
|
15
|
-
}
|
16
|
-
SHELL
|
5
|
+
shell = Botz.open(ARGV[0]).shell
|
6
|
+
case ARGV[1]&.to_sym
|
7
|
+
when :spider then shell.spider(ARGV[2])
|
8
|
+
when :scraper then shell.scraper(ARGV[2])
|
9
|
+
when :shell then shell.function
|
17
10
|
end
|
data/lib/botz/command.rb
CHANGED
@@ -1,45 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
#
|
4
|
-
# Class representing a website defined by DSL
|
5
|
-
#
|
6
3
|
class Botz::Command
|
7
|
-
|
4
|
+
attr_reader :definition
|
5
|
+
attr_reader :shell
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def call(category, name)
|
13
|
-
case category.to_sym
|
14
|
-
when :spider then spider(name)
|
15
|
-
when :scraper then scraper(name)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
20
|
-
def scraper(name)
|
21
|
-
command = scrapers[name.to_sym]
|
22
|
-
while line = STDIN.gets
|
23
|
-
url = line.strip
|
24
|
-
begin
|
25
|
-
command.call(url)
|
26
|
-
rescue
|
27
|
-
STDERR.puts "ERROR #{command} #{url}"
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
# rubocop:enable Lint/AssignmentInCondition, Style/RescueStandardError
|
32
|
-
|
33
|
-
def spider(name)
|
34
|
-
command = spiders[name.to_sym]
|
35
|
-
if File.pipe?(STDIN)
|
36
|
-
STDIN.each_line do |line|
|
37
|
-
start_url = line.strip
|
38
|
-
command.call(start_url) { |url| puts url }
|
39
|
-
end
|
40
|
-
else
|
41
|
-
command.call { |url| puts url }
|
42
|
-
end
|
43
|
-
end
|
7
|
+
def initialize(definition)
|
8
|
+
@definition = definition
|
9
|
+
@shell = Botz::Shell.new(definition: definition)
|
44
10
|
end
|
45
11
|
end
|
data/lib/botz/macro.rb
CHANGED
@@ -3,12 +3,11 @@
|
|
3
3
|
#
|
4
4
|
# Class representing a website defined by DSL
|
5
5
|
#
|
6
|
-
class Botz::
|
6
|
+
class Botz::Definition
|
7
7
|
Output = ->(result) { STDOUT.puts(result.to_json) }
|
8
8
|
|
9
9
|
class_attribute :name
|
10
10
|
class_attribute :domain
|
11
|
-
class_attribute :command
|
12
11
|
class_attribute :spiders, default: {}
|
13
12
|
class_attribute :scrapers, default: {}
|
14
13
|
|
data/lib/botz/shell.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# botz shell access interface
|
5
|
+
#
|
6
|
+
class Botz::Shell
|
7
|
+
include ActiveModel::Model
|
8
|
+
include ActiveModel::Attributes
|
9
|
+
|
10
|
+
attribute :filepath
|
11
|
+
attribute :definition
|
12
|
+
delegate :domain, :spiders, :scrapers, to: :definition
|
13
|
+
|
14
|
+
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
15
|
+
def scraper(name)
|
16
|
+
command = scrapers[name.to_sym]
|
17
|
+
while line = STDIN.gets
|
18
|
+
url = line.strip
|
19
|
+
begin
|
20
|
+
command.call(url)
|
21
|
+
rescue
|
22
|
+
STDERR.puts "ERROR #{command} #{url}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
# rubocop:enable Lint/AssignmentInCondition, Style/RescueStandardError
|
27
|
+
|
28
|
+
def spider(name)
|
29
|
+
command = spiders[name.to_sym]
|
30
|
+
if File.pipe?(STDIN)
|
31
|
+
STDIN.each_line do |line|
|
32
|
+
start_url = line.strip
|
33
|
+
command.call(start_url) { |url| puts url }
|
34
|
+
end
|
35
|
+
else
|
36
|
+
command.call { |url| puts url }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def function
|
41
|
+
print <<~SHELL
|
42
|
+
function spider() {
|
43
|
+
botz #{filepath} spider $1
|
44
|
+
}
|
45
|
+
function scraper() {
|
46
|
+
botz #{filepath} scraper $1
|
47
|
+
}
|
48
|
+
SHELL
|
49
|
+
end
|
50
|
+
end
|
data/lib/botz/version.rb
CHANGED
data/lib/botz.rb
CHANGED
@@ -10,8 +10,9 @@ require 'mechanize'
|
|
10
10
|
#
|
11
11
|
module Botz
|
12
12
|
extend ActiveSupport::Autoload
|
13
|
+
autoload :Shell
|
13
14
|
autoload :Command
|
14
|
-
autoload :
|
15
|
+
autoload :Definition
|
15
16
|
autoload :Spider
|
16
17
|
autoload :Downloader
|
17
18
|
autoload :HtmlScraperMacro
|
@@ -19,20 +20,18 @@ module Botz
|
|
19
20
|
def self.open(filepath)
|
20
21
|
remove_const(:Crawler) if const_defined?(:Crawler)
|
21
22
|
const_set(:Crawler, Module.new)
|
23
|
+
|
22
24
|
# rubocop:disable Security/Eval
|
23
|
-
eval(File.open(filepath).read)
|
25
|
+
definition = eval(File.open(filepath).read)
|
24
26
|
# rubocop:enable Security/Eval
|
27
|
+
|
28
|
+
Botz::Command.new(filepath: filepath, definition: definition)
|
25
29
|
end
|
26
30
|
|
27
31
|
def self.define(name, domain:, &block)
|
28
|
-
crawler_definition = Class.new(Botz::
|
32
|
+
crawler_definition = Class.new(Botz::Definition, &block)
|
29
33
|
crawler_definition.domain = domain
|
30
|
-
|
31
|
-
command_class = Class.new(Botz::Command)
|
32
|
-
command_class.definition = crawler_definition
|
33
|
-
crawler_definition.command = command_class
|
34
|
-
|
35
34
|
Crawler.const_set(name.to_s.camelize, crawler_definition)
|
36
|
-
|
35
|
+
crawler_definition
|
37
36
|
end
|
38
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: botz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -138,6 +138,7 @@ files:
|
|
138
138
|
- lib/botz/downloader.rb
|
139
139
|
- lib/botz/html_scraper_macro.rb
|
140
140
|
- lib/botz/macro.rb
|
141
|
+
- lib/botz/shell.rb
|
141
142
|
- lib/botz/spider.rb
|
142
143
|
- lib/botz/version.rb
|
143
144
|
homepage: https://github.com/aileron-inc/botz
|