skynet-core 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +19 -0
- data/README.mkdn +12 -0
- data/Rakefile +16 -0
- data/config/database.yml.example +37 -0
- data/config/settings.yml +5 -0
- data/config/settings.yml.example +5 -0
- data/lib/skynet-core/cyberdyne.rb +52 -0
- data/lib/skynet-core/db/hippocampus.rb +12 -0
- data/lib/skynet-core/db/ibrands.rb +162 -0
- data/lib/skynet-core/db/migrate/001_create_actives_and_logs.rb +36 -0
- data/lib/skynet-core/db/migrate/002_pimp_log.rb +22 -0
- data/lib/skynet-core/db/migration.rb +5 -0
- data/lib/skynet-core/extend_string.rb +85 -0
- data/lib/skynet-core/harvest.rb +87 -0
- data/lib/skynet-core.rb +111 -0
- data/skynet-core.gemspec +48 -0
- data/spec/cyberdyne_spec.rb +27 -0
- data/spec/harvest_spec.rb +4 -0
- data/spec/skynet-core_spec.rb +32 -0
- metadata +183 -0
data/Manifest
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Manifest
|
2
|
+
README.mkdn
|
3
|
+
Rakefile
|
4
|
+
config/database.yml.example
|
5
|
+
config/settings.yml
|
6
|
+
config/settings.yml.example
|
7
|
+
lib/skynet-core.rb
|
8
|
+
lib/skynet-core/cyberdyne.rb
|
9
|
+
lib/skynet-core/db/hippocampus.rb
|
10
|
+
lib/skynet-core/db/ibrands.rb
|
11
|
+
lib/skynet-core/db/migrate/001_create_actives_and_logs.rb
|
12
|
+
lib/skynet-core/db/migrate/002_pimp_log.rb
|
13
|
+
lib/skynet-core/db/migration.rb
|
14
|
+
lib/skynet-core/extend_string.rb
|
15
|
+
lib/skynet-core/harvest.rb
|
16
|
+
skynet-core.gemspec
|
17
|
+
spec/cyberdyne_spec.rb
|
18
|
+
spec/harvest_spec.rb
|
19
|
+
spec/skynet-core_spec.rb
|
data/README.mkdn
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# Skynet
|
2
|
+
|
3
|
+
Skynet é o robô de data mining do [I-Brands].
|
4
|
+
|
5
|
+
* Autor: Henry Hamon
|
6
|
+
* Copyright: (c) 2010 A2C nós criamos ideias
|
7
|
+
|
8
|
+
## Instalação
|
9
|
+
gem install uuid simpleconsole json nokogiri mechanize htmlentities
|
10
|
+
|
11
|
+
"_A2C recreate the Cyberdyne technology to world domination_"
|
12
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('skynet-core', '0.0.3') do |s|
|
6
|
+
s.description = "Datamining Bot"
|
7
|
+
s.url = "http://github.com/a2c/skynet-core"
|
8
|
+
s.author = "Henry Hamon"
|
9
|
+
s.email = "henry.hamon@a2c.com.br"
|
10
|
+
s.ignore_pattern = ["tmp/*", "script/*"]
|
11
|
+
s.runtime_dependencies = ["json", "nokogiri",
|
12
|
+
"mechanize", "activerecord", "simpleconsole"]
|
13
|
+
s.development_dependencies = ["rspec"]
|
14
|
+
end
|
15
|
+
|
16
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
@@ -0,0 +1,37 @@
|
|
1
|
+
development:
|
2
|
+
skynet:
|
3
|
+
adapter: mysql
|
4
|
+
database: skynet_staging
|
5
|
+
username: root
|
6
|
+
password: toor
|
7
|
+
encoding: utf8
|
8
|
+
|
9
|
+
ibrands:
|
10
|
+
adapter: mysql
|
11
|
+
database: ibrands_development
|
12
|
+
username: root
|
13
|
+
password: toor
|
14
|
+
encoding: utf8
|
15
|
+
|
16
|
+
|
17
|
+
production:
|
18
|
+
skynet:
|
19
|
+
adapter: mysql
|
20
|
+
encoding: utf8
|
21
|
+
database: skynet
|
22
|
+
username: skynet
|
23
|
+
password: "MXQuE4KW9NhjEr7t"
|
24
|
+
host: "vps3.clubedeautores.com.br"
|
25
|
+
port: 3300
|
26
|
+
# socket: /var/run/mysqld/mysqld.sock
|
27
|
+
|
28
|
+
ibrands:
|
29
|
+
adapter: mysql
|
30
|
+
encoding: utf8
|
31
|
+
database: ibrands
|
32
|
+
username: ibrands
|
33
|
+
password: "MXQuE4KW9NhjEr7t"
|
34
|
+
host: "vps3.clubedeautores.com.br"
|
35
|
+
port: 3300
|
36
|
+
# socket: /var/run/mysqld/mysqld.sock
|
37
|
+
|
data/config/settings.yml
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require "#{SKYNET_ROOT}/lib/skynet-core/db/ibrands"
|
2
|
+
|
3
|
+
module Skynet
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Cyberdyne Gerencia os Monitoramentos do [I-Brands]
|
7
|
+
class Cyberdyne
|
8
|
+
attr_reader :track_network_handler
|
9
|
+
|
10
|
+
def initialize(network_type)
|
11
|
+
@network_type = network_type
|
12
|
+
end
|
13
|
+
|
14
|
+
# trabalha em um monitoramento especifico
|
15
|
+
def specific_track(track_id)
|
16
|
+
network = Network.find_by_classification @network_type
|
17
|
+
@track_network_handler ||= TrackedNetwork.find_by_id_and_network_id_and_state track_id, network.nil? ? 0 : network.id, 'passive'
|
18
|
+
end
|
19
|
+
|
20
|
+
# Verifica se existe algum monitoramento a ser processado
|
21
|
+
def have_track_networks?
|
22
|
+
all_track_networks.size > 0
|
23
|
+
end
|
24
|
+
|
25
|
+
# trava o primeiro monitoramento, mudando o status para processing
|
26
|
+
def lock_first_track_network
|
27
|
+
first_track_network.processing!
|
28
|
+
@track_networks_list = nil if first_track_network.processing?
|
29
|
+
first_track_network.processing?
|
30
|
+
end
|
31
|
+
|
32
|
+
# destrava o primeiro track
|
33
|
+
def unlock_first_track_network
|
34
|
+
first_track_network.mining_is_ok!
|
35
|
+
first_track_network.passive?
|
36
|
+
end
|
37
|
+
|
38
|
+
protected
|
39
|
+
|
40
|
+
# Faz cache da mesma (armazenando-a em @track_networks_list)
|
41
|
+
def all_track_networks
|
42
|
+
@track_networks_list ||= TrackedNetwork.all_passive @network_type
|
43
|
+
end
|
44
|
+
|
45
|
+
# Pega o primeiro monitoramento
|
46
|
+
def first_track_network
|
47
|
+
@track_network_handler ||= all_track_networks.first
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.expand_path("#{File.dirname(__FILE__)}/../../skynet-core")
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
Skynet.load_config
|
5
|
+
unless Skynet.database_config.nil?
|
6
|
+
ActiveRecord::Base.establish_connection( Skynet.database_config[Skynet.app_config['enviroment']]['skynet'] )
|
7
|
+
|
8
|
+
class Memory < ActiveRecord::Base
|
9
|
+
validates_presence_of :url
|
10
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require File.expand_path("#{File.dirname(__FILE__)}/../../skynet-core")
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
Skynet.load_config
|
5
|
+
|
6
|
+
unless Skynet.database_config.nil?
|
7
|
+
ActiveRecord::Base.establish_connection( Skynet.database_config[Skynet.app_config['enviroment']]['ibrands'] )
|
8
|
+
|
9
|
+
class Network < ActiveRecord::Base
|
10
|
+
has_many :tracked_networks
|
11
|
+
|
12
|
+
validates_presence_of :name
|
13
|
+
validates_presence_of :classification
|
14
|
+
|
15
|
+
attr_accessible :name, :classification
|
16
|
+
end
|
17
|
+
|
18
|
+
class Brand < ActiveRecord::Base
|
19
|
+
has_many :tracked_networks
|
20
|
+
has_many :mining_terms
|
21
|
+
has_many :sieves
|
22
|
+
end
|
23
|
+
|
24
|
+
class TrackedNetwork < ActiveRecord::Base
|
25
|
+
belongs_to :brand
|
26
|
+
belongs_to :network
|
27
|
+
has_many :blogospheres, :dependent => :destroy
|
28
|
+
has_many :viral_networks, :dependent => :destroy
|
29
|
+
has_many :online_presses, :dependent => :destroy
|
30
|
+
has_many :other_networks, :dependent => :destroy
|
31
|
+
has_many :limbos, :dependent => :destroy
|
32
|
+
|
33
|
+
named_scope :all_passive, lambda {|*args| { :conditions => ["brand_id is not null AND tracked_networks.state = ?
|
34
|
+
AND networks.classification = ?", "passive", (args.first || nil)],
|
35
|
+
:order => "tracked_networks.updated_at ASC", :limit => 30, :include => :network}}
|
36
|
+
|
37
|
+
named_scope :especific_brand, lambda {|*args| { :conditions => ["brand_id = ? AND tracked_networks.state = ?
|
38
|
+
AND networks.classification = ?", (args[0] || nil), "passive", (args[1] || nil)],
|
39
|
+
:order => "tracked_networks.updated_at ASC", :limit => 30, :include => :network}}
|
40
|
+
|
41
|
+
named_scope :especific_track, lambda {|*args| { :conditions => ["brand_id is not null AND id = ?
|
42
|
+
AND networks.classification = ?", (args[0] || nil), (args[1] || nil)],
|
43
|
+
:order => "tracked_networks.updated_at ASC", :limit => 30, :include => :network}}
|
44
|
+
|
45
|
+
def processing!
|
46
|
+
self.state = 'in_processing' if self.state == 'passive'
|
47
|
+
self.save
|
48
|
+
end
|
49
|
+
|
50
|
+
def processing?
|
51
|
+
self.state == 'in_processing'
|
52
|
+
end
|
53
|
+
|
54
|
+
def mining_is_ok!
|
55
|
+
self.state = 'passive' if self.state == 'in_processing'
|
56
|
+
self.save
|
57
|
+
end
|
58
|
+
|
59
|
+
def passive?
|
60
|
+
self.state == 'passive'
|
61
|
+
end
|
62
|
+
|
63
|
+
def mining_is_not_ok!
|
64
|
+
self.state = 'registred' if self.state == 'in_processing'
|
65
|
+
self.save
|
66
|
+
end
|
67
|
+
|
68
|
+
def sieves
|
69
|
+
@sieves ||= Sieve.by_brand self.brand_id
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
class OtherNetwork < ActiveRecord::Base
|
75
|
+
belongs_to :tracked_network
|
76
|
+
|
77
|
+
validates_presence_of :source
|
78
|
+
validates_presence_of :topic
|
79
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
80
|
+
end
|
81
|
+
|
82
|
+
class Limbo < ActiveRecord::Base
|
83
|
+
belongs_to :tracked_network
|
84
|
+
|
85
|
+
validates_presence_of :url
|
86
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
87
|
+
end
|
88
|
+
|
89
|
+
class OnlinePress < ActiveRecord::Base
|
90
|
+
belongs_to :tracked_network
|
91
|
+
|
92
|
+
validates_presence_of :url
|
93
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
94
|
+
end
|
95
|
+
|
96
|
+
class ViralNetwork < ActiveRecord::Base
|
97
|
+
belongs_to :tracked_network
|
98
|
+
|
99
|
+
validates_numericality_of :views
|
100
|
+
validates_presence_of :filename, :user
|
101
|
+
validates_uniqueness_of :filename, :scope => :tracked_network_id
|
102
|
+
end
|
103
|
+
|
104
|
+
class Blogosphere < ActiveRecord::Base
|
105
|
+
belongs_to :tracked_network
|
106
|
+
|
107
|
+
validates_presence_of :name
|
108
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
109
|
+
end
|
110
|
+
|
111
|
+
class FamiliarUrl < ActiveRecord::Base
|
112
|
+
validates_presence_of :url
|
113
|
+
|
114
|
+
named_scope :rss_sources, :select => "distinct url", :conditions => ["url_type in (3,4)"]
|
115
|
+
named_scope :url_sources, :conditions => ["url_type not in (3,4)"]
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
class Sieve < ActiveRecord::Base
|
120
|
+
belongs_to :brand
|
121
|
+
|
122
|
+
named_scope :by_brand, lambda {|*args| { :conditions => ["brand_id = ?", (args[0] || nil)]}}
|
123
|
+
named_scope :metagenerator, :conditions => ["url_type = 2"]
|
124
|
+
named_scope :rss_sources, :select => "distinct url", :conditions => ["url_type in (3,4)"]
|
125
|
+
|
126
|
+
def self.by_brand(brand, url_type = nil)
|
127
|
+
if url_type.blank?
|
128
|
+
self.all :conditions => ["brand_id = ?", brand.id]
|
129
|
+
else
|
130
|
+
self.all :conditions => ["brand_id = ? AND url_type = ?", brand.id, url_type]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
class MiningTerm < ActiveRecord::Base
|
136
|
+
belongs_to :brand
|
137
|
+
belongs_to :theme
|
138
|
+
|
139
|
+
def search_terms
|
140
|
+
self.term.split(/,|;/)
|
141
|
+
end
|
142
|
+
|
143
|
+
def exclude_terms
|
144
|
+
exclude_list = []
|
145
|
+
self.filter.split(/,|;/).each do |filters|
|
146
|
+
filters.strip!
|
147
|
+
exclude_list << filters.gsub(/^-/,'') if filters.match(/^-/)
|
148
|
+
end
|
149
|
+
exclude_list
|
150
|
+
end
|
151
|
+
|
152
|
+
def filter_terms
|
153
|
+
list = []
|
154
|
+
self.filter.split(/,|;/).each do |filters|
|
155
|
+
filters.strip!
|
156
|
+
list << filters unless filters.match(/^-/)
|
157
|
+
end
|
158
|
+
list
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
require 'lib/skynet/hippocampus'
|
4
|
+
|
5
|
+
#ActiveRecord::Schema.define do
|
6
|
+
class CreateMemoriesAndLogs < ActiveRecord::Migration
|
7
|
+
|
8
|
+
create_table :memories do |t|
|
9
|
+
t.column :brand_id, :integer
|
10
|
+
t.column :tracked_network_id, :integer
|
11
|
+
t.column :network_type, :integer
|
12
|
+
t.column :url, :string
|
13
|
+
t.column :source, :string
|
14
|
+
t.column :post, :text
|
15
|
+
t.column :author, :string
|
16
|
+
t.column :description, :text
|
17
|
+
t.column :slot_1, :integer
|
18
|
+
t.column :slot_2, :integer
|
19
|
+
t.column :slot_3, :integer
|
20
|
+
t.column :integrated, :boolean, :default => false
|
21
|
+
t.column :created_at, :datetime
|
22
|
+
t.column :integrated_at, :datetime
|
23
|
+
end
|
24
|
+
|
25
|
+
add_index :memories, [:url, :tracked_network_id], :unique => true
|
26
|
+
|
27
|
+
|
28
|
+
create_table :logs do |table|
|
29
|
+
table.column :message, :text
|
30
|
+
table.column :pid, :string
|
31
|
+
table.column :tracked_network_id, :integer
|
32
|
+
table.column :created_at, :datetime
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
require 'lib/skynet/hippocampus'
|
4
|
+
|
5
|
+
class PimpLog < ActiveRecord::Migration
|
6
|
+
def self.up
|
7
|
+
remove_column :logs, :pid
|
8
|
+
add_column :logs, :picked, :integer
|
9
|
+
add_column :logs, :found, :integer
|
10
|
+
add_column :logs, :started_at, :datetime
|
11
|
+
add_column :logs, :finished_at, :datetime
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.down
|
15
|
+
add_column :logs, :pid, :integer
|
16
|
+
remove_column :logs, :picked
|
17
|
+
remove_column :logs, :found
|
18
|
+
remove_column :logs, :started_at
|
19
|
+
remove_column :logs, :finished_at
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# RemoveAccents version 1.0.3 (c) 2008-2009 Solutions Informatiques Techniconseils inc.
|
2
|
+
#
|
3
|
+
# This module adds 2 methods to the string class.
|
4
|
+
# Up-to-date version and documentation available at:
|
5
|
+
#
|
6
|
+
# http://www.techniconseils.ca/en/scripts-remove-accents-ruby.php
|
7
|
+
#
|
8
|
+
# This script is available under the following license :
|
9
|
+
# Creative Commons Attribution-Share Alike 2.5.
|
10
|
+
#
|
11
|
+
# See full license and details at :
|
12
|
+
# http://creativecommons.org/licenses/by-sa/2.5/ca/
|
13
|
+
#
|
14
|
+
# Version history:
|
15
|
+
# * 1.0.3 : July 23 2009
|
16
|
+
# Corrected some incorrect character codes. Source is now wikipedia at:
|
17
|
+
# http://en.wikipedia.org/wiki/ISO/IEC_8859-1#Related_character_maps
|
18
|
+
# Thanks to Raimon Fernandez for pointing out the incorrect codes.
|
19
|
+
# * 1.0.2 : October 29 2008
|
20
|
+
# Slightly optimized version of urlize - Jonathan Grenier (jgrenier@techniconseils.ca)
|
21
|
+
# * 1.0.1 : October 29 2008
|
22
|
+
# First public revision - Jonathan Grenier (jgrenier@techniconseils.ca)
|
23
|
+
#
|
24
|
+
|
25
|
+
class String
|
26
|
+
# The extended characters map used by removeaccents. The accented characters
|
27
|
+
# are coded here using their numerical equivalent to sidestep encoding issues.
|
28
|
+
# These correspond to ISO-8859-1 encoding.
|
29
|
+
ACCENTS_MAPPING = {
|
30
|
+
'E' => [200,201,202,203],
|
31
|
+
'e' => [232,233,234,235],
|
32
|
+
'A' => [192,193,194,195,196,197],
|
33
|
+
'a' => [224,225,226,227,228,229,230],
|
34
|
+
'C' => [199],
|
35
|
+
'c' => [231],
|
36
|
+
'O' => [210,211,212,213,214,216],
|
37
|
+
'o' => [242,243,244,245,246,248],
|
38
|
+
'I' => [204,205,206,207],
|
39
|
+
'i' => [236,237,238,239],
|
40
|
+
'U' => [217,218,219,220],
|
41
|
+
'u' => [249,250,251,252],
|
42
|
+
'N' => [209],
|
43
|
+
'n' => [241],
|
44
|
+
'Y' => [221],
|
45
|
+
'y' => [253,255],
|
46
|
+
'AE' => [306],
|
47
|
+
'ae' => [346],
|
48
|
+
'OE' => [188],
|
49
|
+
'oe' => [189]
|
50
|
+
}
|
51
|
+
|
52
|
+
|
53
|
+
# Remove the accents from the string. Uses String::ACCENTS_MAPPING as the source map.
|
54
|
+
def removeaccents
|
55
|
+
str = String.new(self)
|
56
|
+
String::ACCENTS_MAPPING.each {|letter,accents|
|
57
|
+
packed = accents.pack('U*')
|
58
|
+
rxp = Regexp.new("[#{packed}]", nil, 'U')
|
59
|
+
str.gsub!(rxp, letter)
|
60
|
+
}
|
61
|
+
|
62
|
+
str
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
# Convert a string to a format suitable for a URL without ever using escaped characters.
|
67
|
+
# It calls strip, removeaccents, downcase (optional) then removes the spaces (optional)
|
68
|
+
# and finally removes any characters matching the default regexp (/[^-_A-Za-z0-9]/).
|
69
|
+
#
|
70
|
+
# Options
|
71
|
+
#
|
72
|
+
# * :downcase => call downcase on the string (defaults to true)
|
73
|
+
# * :convert_spaces => Convert space to underscore (defaults to false)
|
74
|
+
# * :regexp => The regexp matching characters that will be converting to an empty string (defaults to /[^-_A-Za-z0-9]/)
|
75
|
+
def urlize(options = {})
|
76
|
+
options[:downcase] ||= true
|
77
|
+
options[:convert_spaces] ||= false
|
78
|
+
options[:regexp] ||= /[^-_A-Za-z0-9]/
|
79
|
+
|
80
|
+
str = self.strip.removeaccents
|
81
|
+
str.downcase! if options[:downcase]
|
82
|
+
str.gsub!(/\ /,'_') if options[:convert_spaces]
|
83
|
+
str.gsub(options[:regexp], '')
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require "#{SKYNET_ROOT}/lib/skynet-core"
|
2
|
+
require "#{SKYNET_ROOT}/lib/skynet-core/db/ibrands"
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
module Skynet
|
6
|
+
extend self
|
7
|
+
|
8
|
+
class Harvest
|
9
|
+
attr_reader :buffer, :buffer_mining_terms,
|
10
|
+
:cost, :results
|
11
|
+
attr_accessor :logger
|
12
|
+
|
13
|
+
def initialize(network_type)
|
14
|
+
@network_type = network_type
|
15
|
+
@logger = Hash.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def base_process(&block)
|
19
|
+
@buffer = Cyberdyne.new @network_type
|
20
|
+
if @buffer.have_track_networks?
|
21
|
+
internal_process &block
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def base_specific_track_process(track_id, &block)
|
26
|
+
@buffer = Cyberdyne.new @network_type
|
27
|
+
if @buffer.specific_track(track_id)
|
28
|
+
internal_process &block
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# O custo do monitoramento é calculado por
|
33
|
+
# quantidade de sinônimos mutiplicado pelo total de termos de mineração
|
34
|
+
def cost_calculation
|
35
|
+
minings = @buffer_mining_terms.size
|
36
|
+
search_term = 0
|
37
|
+
@buffer_mining_terms.each{|t| search_term += t.search_terms.size}
|
38
|
+
@cost = minings * search_term
|
39
|
+
end
|
40
|
+
|
41
|
+
def generate_regex(terms, boundarie = "", remove_accent = false)
|
42
|
+
output = ""
|
43
|
+
terms.each do |t|
|
44
|
+
t = t.removeaccents if remove_accent
|
45
|
+
output << "#{boundarie}#{t.gsub(/\+|\-|\/|\|\?|\*|\\|\|/,'')}"
|
46
|
+
output += "£"
|
47
|
+
end
|
48
|
+
unless output.empty?
|
49
|
+
output.gsub!(/£$/,"")
|
50
|
+
output.gsub!("£","|")
|
51
|
+
end
|
52
|
+
|
53
|
+
"(?i)(#{output})"
|
54
|
+
end
|
55
|
+
|
56
|
+
def find
|
57
|
+
raise "Abstract method called!"
|
58
|
+
end
|
59
|
+
|
60
|
+
protected
|
61
|
+
|
62
|
+
def internal_process
|
63
|
+
@buffer.lock_first_track_network
|
64
|
+
load_mining_terms
|
65
|
+
cost_calculation
|
66
|
+
@buffer_mining_terms.each do |mining_term|
|
67
|
+
mining_term.search_terms.each do |search_term|
|
68
|
+
unless search_term.nil? || search_term.empty?
|
69
|
+
@results = []
|
70
|
+
yield mining_term, search_term
|
71
|
+
end #unless
|
72
|
+
end
|
73
|
+
end
|
74
|
+
@buffer.unlock_first_track_network
|
75
|
+
end
|
76
|
+
|
77
|
+
def load_mining_terms
|
78
|
+
@buffer_mining_terms ||= @buffer.track_network_handler.brand.mining_terms
|
79
|
+
end
|
80
|
+
|
81
|
+
def load_language
|
82
|
+
@buffer_language ||= @buffer.track_network_handler.brand.language
|
83
|
+
end
|
84
|
+
|
85
|
+
end #class
|
86
|
+
|
87
|
+
end #module
|
data/lib/skynet-core.rb
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "yaml"
|
3
|
+
|
4
|
+
SKYNET_ROOT = File.expand_path("#{File.dirname(__FILE__)}/..") unless defined?(SKYNET_ROOT)
|
5
|
+
|
6
|
+
Dir[File.dirname(__FILE__) + '/skynet-core/*.rb'].each {|file| require file }
|
7
|
+
|
8
|
+
# Defines our SKYNET_ENV
|
9
|
+
SKYNET_ENV = ENV["SKYNET_ENV"] ||= ENV["RACK_ENV"] ||= "development" unless defined?(SKYNET_ENV)
|
10
|
+
#
|
11
|
+
module Skynet
|
12
|
+
|
13
|
+
attr_reader :_app_config, :_database_config
|
14
|
+
|
15
|
+
class ApplicationLoadError < RuntimeError #:nodoc:
|
16
|
+
end
|
17
|
+
|
18
|
+
class << self
|
19
|
+
|
20
|
+
def env
|
21
|
+
@_env ||= SKYNET_ENV.to_s.downcase.to_sym
|
22
|
+
end
|
23
|
+
|
24
|
+
def root(*args)
|
25
|
+
File.expand_path(File.join(SKYNET_ROOT, *args))
|
26
|
+
end
|
27
|
+
|
28
|
+
# Default encoding to UTF8 if it has not already been set to something else.
|
29
|
+
#
|
30
|
+
def set_encoding
|
31
|
+
unless RUBY_VERSION >= '1.9'
|
32
|
+
$KCODE = 'U' if $KCODE == 'NONE' || $KCODE.blank?
|
33
|
+
end
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# def bundle
|
39
|
+
# return :locked if File.exist?(root('.bundle/environment.rb'))
|
40
|
+
# return :unlocked if File.exist?(root("Gemfile"))
|
41
|
+
# end
|
42
|
+
|
43
|
+
def load_config
|
44
|
+
app_config
|
45
|
+
database_config
|
46
|
+
|
47
|
+
raise "Config files not found" if @_app_config.nil? || @_database_config.nil?
|
48
|
+
!@_app_config.nil?
|
49
|
+
end
|
50
|
+
|
51
|
+
def app_config
|
52
|
+
@_app_config ||= set_app_config
|
53
|
+
end
|
54
|
+
|
55
|
+
def database_config
|
56
|
+
@_database_config ||= set_database_config
|
57
|
+
end
|
58
|
+
|
59
|
+
def require_dependencies(*paths)
|
60
|
+
# Extract all files to load
|
61
|
+
files = paths.map { |path| Dir[path] }.flatten
|
62
|
+
|
63
|
+
while files.present?
|
64
|
+
# We need a size to make sure things are loading
|
65
|
+
size_at_start = files.size
|
66
|
+
|
67
|
+
# List of errors and failed files
|
68
|
+
errors, failed = [], []
|
69
|
+
|
70
|
+
# Now we try to require our dependencies
|
71
|
+
files.each do |file|
|
72
|
+
begin
|
73
|
+
require file
|
74
|
+
files.delete(file)
|
75
|
+
rescue Exception => e
|
76
|
+
errors << e
|
77
|
+
failed << files
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Stop processing if nothing loads or if everything has loaded
|
82
|
+
raise errors.last if files.size == size_at_start && files.present?
|
83
|
+
break if files.empty?
|
84
|
+
end
|
85
|
+
end
|
86
|
+
alias :require_dependency :require_dependencies
|
87
|
+
|
88
|
+
##
|
89
|
+
# Attempts to load all dependency libs that we need.
|
90
|
+
# If you use this method we can perform correctly a Padrino.reload!
|
91
|
+
#
|
92
|
+
def load_dependencies(*paths)
|
93
|
+
paths.each do |path|
|
94
|
+
Dir[path].each { |file| load(file) }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
alias :load_dependency :load_dependencies
|
98
|
+
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def set_app_config
|
103
|
+
YAML::load(File.open(root("config/settings.yml"))) if File.exist?(root("config/settings.yml"))
|
104
|
+
end
|
105
|
+
|
106
|
+
def set_database_config
|
107
|
+
YAML::load(File.open(root("config/database.yml"))) if File.exist?(root("config/database.yml"))
|
108
|
+
end
|
109
|
+
|
110
|
+
end # self
|
111
|
+
end
|
data/skynet-core.gemspec
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{skynet-core}
|
5
|
+
s.version = "0.0.3"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Henry Hamon"]
|
9
|
+
s.date = %q{2010-06-09}
|
10
|
+
s.description = %q{Datamining Bot}
|
11
|
+
s.email = %q{henry.hamon@a2c.com.br}
|
12
|
+
s.extra_rdoc_files = ["README.mkdn", "lib/skynet-core.rb", "lib/skynet-core/cyberdyne.rb", "lib/skynet-core/db/hippocampus.rb", "lib/skynet-core/db/ibrands.rb", "lib/skynet-core/db/migrate/001_create_actives_and_logs.rb", "lib/skynet-core/db/migrate/002_pimp_log.rb", "lib/skynet-core/db/migration.rb", "lib/skynet-core/extend_string.rb", "lib/skynet-core/harvest.rb"]
|
13
|
+
s.files = ["Manifest", "README.mkdn", "Rakefile", "config/database.yml.example", "config/settings.yml", "config/settings.yml.example", "lib/skynet-core.rb", "lib/skynet-core/cyberdyne.rb", "lib/skynet-core/db/hippocampus.rb", "lib/skynet-core/db/ibrands.rb", "lib/skynet-core/db/migrate/001_create_actives_and_logs.rb", "lib/skynet-core/db/migrate/002_pimp_log.rb", "lib/skynet-core/db/migration.rb", "lib/skynet-core/extend_string.rb", "lib/skynet-core/harvest.rb", "skynet-core.gemspec", "spec/cyberdyne_spec.rb", "spec/harvest_spec.rb", "spec/skynet-core_spec.rb"]
|
14
|
+
s.homepage = %q{http://github.com/a2c/skynet-core}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Skynet-core", "--main", "README.mkdn"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{skynet-core}
|
18
|
+
s.rubygems_version = %q{1.3.7}
|
19
|
+
s.summary = %q{Datamining Bot}
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
23
|
+
s.specification_version = 3
|
24
|
+
|
25
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
26
|
+
s.add_runtime_dependency(%q<json>, [">= 0"])
|
27
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
28
|
+
s.add_runtime_dependency(%q<mechanize>, [">= 0"])
|
29
|
+
s.add_runtime_dependency(%q<activerecord>, [">= 0"])
|
30
|
+
s.add_runtime_dependency(%q<simpleconsole>, [">= 0"])
|
31
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
32
|
+
else
|
33
|
+
s.add_dependency(%q<json>, [">= 0"])
|
34
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
35
|
+
s.add_dependency(%q<mechanize>, [">= 0"])
|
36
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
37
|
+
s.add_dependency(%q<simpleconsole>, [">= 0"])
|
38
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
39
|
+
end
|
40
|
+
else
|
41
|
+
s.add_dependency(%q<json>, [">= 0"])
|
42
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
43
|
+
s.add_dependency(%q<mechanize>, [">= 0"])
|
44
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
45
|
+
s.add_dependency(%q<simpleconsole>, [">= 0"])
|
46
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'spec'
|
3
|
+
|
4
|
+
require File.dirname(__FILE__) + '/../lib/skynet-core'
|
5
|
+
|
6
|
+
describe "cyberdyne" do
|
7
|
+
before(:each) do
|
8
|
+
@cyberdyne = Skynet::Cyberdyne.new 7
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should have track" do
|
12
|
+
@cyberdyne.have_track_networks?.should be_true
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should have lock the first track" do
|
16
|
+
@cyberdyne.lock_first_track_network.should be_true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should have unlock the track" do
|
20
|
+
@cyberdyne.unlock_first_track_network.should be_true
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should load the track" do
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'spec'
|
3
|
+
|
4
|
+
require File.dirname(__FILE__) + '/../lib/skynet-core'
|
5
|
+
|
6
|
+
describe "skynet-core" do
|
7
|
+
|
8
|
+
# it "check some global methods" do
|
9
|
+
# should_validate_presence_of Skynet, :root
|
10
|
+
# end
|
11
|
+
|
12
|
+
# it "should not find config files" do
|
13
|
+
|
14
|
+
# if File.exist?(File.dirname(__FILE__) + '/../config/settings.xml')
|
15
|
+
# system "mv #{File.dirname(__FILE__) + '/../config/settings.xml'} #{File.dirname(__FILE__) + '/../config/settings.xml.example'}"
|
16
|
+
# end
|
17
|
+
|
18
|
+
# lambda{Skynet.load_config}.should raise_error(RuntimeError, "Config files not found")
|
19
|
+
|
20
|
+
# end
|
21
|
+
|
22
|
+
it "should find config files" do
|
23
|
+
|
24
|
+
if File.exist?(File.dirname(__FILE__) + '/../config/settings.xml.example')
|
25
|
+
system "mv #{File.dirname(__FILE__) + '/../config/settings.xml.example'} #{File.dirname(__FILE__) + '/../config/settings.xml'}"
|
26
|
+
end
|
27
|
+
|
28
|
+
Skynet.load_config.should be_true
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: skynet-core
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Henry Hamon
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-06-09 00:00:00 -03:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: json
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: nokogiri
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: mechanize
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: activerecord
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
type: :runtime
|
76
|
+
version_requirements: *id004
|
77
|
+
- !ruby/object:Gem::Dependency
|
78
|
+
name: simpleconsole
|
79
|
+
prerelease: false
|
80
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
type: :runtime
|
90
|
+
version_requirements: *id005
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: rspec
|
93
|
+
prerelease: false
|
94
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
hash: 3
|
100
|
+
segments:
|
101
|
+
- 0
|
102
|
+
version: "0"
|
103
|
+
type: :development
|
104
|
+
version_requirements: *id006
|
105
|
+
description: Datamining Bot
|
106
|
+
email: henry.hamon@a2c.com.br
|
107
|
+
executables: []
|
108
|
+
|
109
|
+
extensions: []
|
110
|
+
|
111
|
+
extra_rdoc_files:
|
112
|
+
- README.mkdn
|
113
|
+
- lib/skynet-core.rb
|
114
|
+
- lib/skynet-core/cyberdyne.rb
|
115
|
+
- lib/skynet-core/db/hippocampus.rb
|
116
|
+
- lib/skynet-core/db/ibrands.rb
|
117
|
+
- lib/skynet-core/db/migrate/001_create_actives_and_logs.rb
|
118
|
+
- lib/skynet-core/db/migrate/002_pimp_log.rb
|
119
|
+
- lib/skynet-core/db/migration.rb
|
120
|
+
- lib/skynet-core/extend_string.rb
|
121
|
+
- lib/skynet-core/harvest.rb
|
122
|
+
files:
|
123
|
+
- Manifest
|
124
|
+
- README.mkdn
|
125
|
+
- Rakefile
|
126
|
+
- config/database.yml.example
|
127
|
+
- config/settings.yml
|
128
|
+
- config/settings.yml.example
|
129
|
+
- lib/skynet-core.rb
|
130
|
+
- lib/skynet-core/cyberdyne.rb
|
131
|
+
- lib/skynet-core/db/hippocampus.rb
|
132
|
+
- lib/skynet-core/db/ibrands.rb
|
133
|
+
- lib/skynet-core/db/migrate/001_create_actives_and_logs.rb
|
134
|
+
- lib/skynet-core/db/migrate/002_pimp_log.rb
|
135
|
+
- lib/skynet-core/db/migration.rb
|
136
|
+
- lib/skynet-core/extend_string.rb
|
137
|
+
- lib/skynet-core/harvest.rb
|
138
|
+
- skynet-core.gemspec
|
139
|
+
- spec/cyberdyne_spec.rb
|
140
|
+
- spec/harvest_spec.rb
|
141
|
+
- spec/skynet-core_spec.rb
|
142
|
+
has_rdoc: true
|
143
|
+
homepage: http://github.com/a2c/skynet-core
|
144
|
+
licenses: []
|
145
|
+
|
146
|
+
post_install_message:
|
147
|
+
rdoc_options:
|
148
|
+
- --line-numbers
|
149
|
+
- --inline-source
|
150
|
+
- --title
|
151
|
+
- Skynet-core
|
152
|
+
- --main
|
153
|
+
- README.mkdn
|
154
|
+
require_paths:
|
155
|
+
- lib
|
156
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
157
|
+
none: false
|
158
|
+
requirements:
|
159
|
+
- - ">="
|
160
|
+
- !ruby/object:Gem::Version
|
161
|
+
hash: 3
|
162
|
+
segments:
|
163
|
+
- 0
|
164
|
+
version: "0"
|
165
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
166
|
+
none: false
|
167
|
+
requirements:
|
168
|
+
- - ">="
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
hash: 11
|
171
|
+
segments:
|
172
|
+
- 1
|
173
|
+
- 2
|
174
|
+
version: "1.2"
|
175
|
+
requirements: []
|
176
|
+
|
177
|
+
rubyforge_project: skynet-core
|
178
|
+
rubygems_version: 1.3.7
|
179
|
+
signing_key:
|
180
|
+
specification_version: 3
|
181
|
+
summary: Datamining Bot
|
182
|
+
test_files: []
|
183
|
+
|