skynet-core 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +19 -0
- data/README.mkdn +12 -0
- data/Rakefile +16 -0
- data/config/database.yml.example +37 -0
- data/config/settings.yml +5 -0
- data/config/settings.yml.example +5 -0
- data/lib/skynet-core/cyberdyne.rb +52 -0
- data/lib/skynet-core/db/hippocampus.rb +12 -0
- data/lib/skynet-core/db/ibrands.rb +162 -0
- data/lib/skynet-core/db/migrate/001_create_actives_and_logs.rb +36 -0
- data/lib/skynet-core/db/migrate/002_pimp_log.rb +22 -0
- data/lib/skynet-core/db/migration.rb +5 -0
- data/lib/skynet-core/extend_string.rb +85 -0
- data/lib/skynet-core/harvest.rb +87 -0
- data/lib/skynet-core.rb +111 -0
- data/skynet-core.gemspec +48 -0
- data/spec/cyberdyne_spec.rb +27 -0
- data/spec/harvest_spec.rb +4 -0
- data/spec/skynet-core_spec.rb +32 -0
- metadata +183 -0
data/Manifest
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Manifest
|
2
|
+
README.mkdn
|
3
|
+
Rakefile
|
4
|
+
config/database.yml.example
|
5
|
+
config/settings.yml
|
6
|
+
config/settings.yml.example
|
7
|
+
lib/skynet-core.rb
|
8
|
+
lib/skynet-core/cyberdyne.rb
|
9
|
+
lib/skynet-core/db/hippocampus.rb
|
10
|
+
lib/skynet-core/db/ibrands.rb
|
11
|
+
lib/skynet-core/db/migrate/001_create_actives_and_logs.rb
|
12
|
+
lib/skynet-core/db/migrate/002_pimp_log.rb
|
13
|
+
lib/skynet-core/db/migration.rb
|
14
|
+
lib/skynet-core/extend_string.rb
|
15
|
+
lib/skynet-core/harvest.rb
|
16
|
+
skynet-core.gemspec
|
17
|
+
spec/cyberdyne_spec.rb
|
18
|
+
spec/harvest_spec.rb
|
19
|
+
spec/skynet-core_spec.rb
|
data/README.mkdn
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# Skynet
|
2
|
+
|
3
|
+
Skynet é o robô de data mining do [I-Brands].
|
4
|
+
|
5
|
+
* Autor: Henry Hamon
|
6
|
+
* Copyright: (c) 2010 A2C nós criamos ideias
|
7
|
+
|
8
|
+
## Instalação
|
9
|
+
gem install uuid simpleconsole json nokogiri mechanize htmlentities
|
10
|
+
|
11
|
+
"_A2C recreate the Cyberdyne technology to world domination_"
|
12
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('skynet-core', '0.0.3') do |s|
|
6
|
+
s.description = "Datamining Bot"
|
7
|
+
s.url = "http://github.com/a2c/skynet-core"
|
8
|
+
s.author = "Henry Hamon"
|
9
|
+
s.email = "henry.hamon@a2c.com.br"
|
10
|
+
s.ignore_pattern = ["tmp/*", "script/*"]
|
11
|
+
s.runtime_dependencies = ["json", "nokogiri",
|
12
|
+
"mechanize", "activerecord", "simpleconsole"]
|
13
|
+
s.development_dependencies = ["rspec"]
|
14
|
+
end
|
15
|
+
|
16
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
@@ -0,0 +1,37 @@
|
|
1
|
+
development:
|
2
|
+
skynet:
|
3
|
+
adapter: mysql
|
4
|
+
database: skynet_staging
|
5
|
+
username: root
|
6
|
+
password: toor
|
7
|
+
encoding: utf8
|
8
|
+
|
9
|
+
ibrands:
|
10
|
+
adapter: mysql
|
11
|
+
database: ibrands_development
|
12
|
+
username: root
|
13
|
+
password: toor
|
14
|
+
encoding: utf8
|
15
|
+
|
16
|
+
|
17
|
+
production:
|
18
|
+
skynet:
|
19
|
+
adapter: mysql
|
20
|
+
encoding: utf8
|
21
|
+
database: skynet
|
22
|
+
username: skynet
|
23
|
+
password: "MXQuE4KW9NhjEr7t"
|
24
|
+
host: "vps3.clubedeautores.com.br"
|
25
|
+
port: 3300
|
26
|
+
# socket: /var/run/mysqld/mysqld.sock
|
27
|
+
|
28
|
+
ibrands:
|
29
|
+
adapter: mysql
|
30
|
+
encoding: utf8
|
31
|
+
database: ibrands
|
32
|
+
username: ibrands
|
33
|
+
password: "MXQuE4KW9NhjEr7t"
|
34
|
+
host: "vps3.clubedeautores.com.br"
|
35
|
+
port: 3300
|
36
|
+
# socket: /var/run/mysqld/mysqld.sock
|
37
|
+
|
data/config/settings.yml
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require "#{SKYNET_ROOT}/lib/skynet-core/db/ibrands"
|
2
|
+
|
3
|
+
module Skynet
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Cyberdyne Gerencia os Monitoramentos do [I-Brands]
|
7
|
+
class Cyberdyne
|
8
|
+
attr_reader :track_network_handler
|
9
|
+
|
10
|
+
def initialize(network_type)
|
11
|
+
@network_type = network_type
|
12
|
+
end
|
13
|
+
|
14
|
+
# trabalha em um monitoramento especifico
|
15
|
+
def specific_track(track_id)
|
16
|
+
network = Network.find_by_classification @network_type
|
17
|
+
@track_network_handler ||= TrackedNetwork.find_by_id_and_network_id_and_state track_id, network.nil? ? 0 : network.id, 'passive'
|
18
|
+
end
|
19
|
+
|
20
|
+
# Verifica se existe algum monitoramento a ser processado
|
21
|
+
def have_track_networks?
|
22
|
+
all_track_networks.size > 0
|
23
|
+
end
|
24
|
+
|
25
|
+
# trava o primeiro monitoramento, mudando o status para processing
|
26
|
+
def lock_first_track_network
|
27
|
+
first_track_network.processing!
|
28
|
+
@track_networks_list = nil if first_track_network.processing?
|
29
|
+
first_track_network.processing?
|
30
|
+
end
|
31
|
+
|
32
|
+
# destrava o primeiro track
|
33
|
+
def unlock_first_track_network
|
34
|
+
first_track_network.mining_is_ok!
|
35
|
+
first_track_network.passive?
|
36
|
+
end
|
37
|
+
|
38
|
+
protected
|
39
|
+
|
40
|
+
# Faz cache da mesma (armazenando-a em @track_networks_list)
|
41
|
+
def all_track_networks
|
42
|
+
@track_networks_list ||= TrackedNetwork.all_passive @network_type
|
43
|
+
end
|
44
|
+
|
45
|
+
# Pega o primeiro monitoramento
|
46
|
+
def first_track_network
|
47
|
+
@track_network_handler ||= all_track_networks.first
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.expand_path("#{File.dirname(__FILE__)}/../../skynet-core")
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
Skynet.load_config
|
5
|
+
unless Skynet.database_config.nil?
|
6
|
+
ActiveRecord::Base.establish_connection( Skynet.database_config[Skynet.app_config['enviroment']]['skynet'] )
|
7
|
+
|
8
|
+
class Memory < ActiveRecord::Base
|
9
|
+
validates_presence_of :url
|
10
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require File.expand_path("#{File.dirname(__FILE__)}/../../skynet-core")
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
Skynet.load_config
|
5
|
+
|
6
|
+
unless Skynet.database_config.nil?
|
7
|
+
ActiveRecord::Base.establish_connection( Skynet.database_config[Skynet.app_config['enviroment']]['ibrands'] )
|
8
|
+
|
9
|
+
class Network < ActiveRecord::Base
|
10
|
+
has_many :tracked_networks
|
11
|
+
|
12
|
+
validates_presence_of :name
|
13
|
+
validates_presence_of :classification
|
14
|
+
|
15
|
+
attr_accessible :name, :classification
|
16
|
+
end
|
17
|
+
|
18
|
+
class Brand < ActiveRecord::Base
|
19
|
+
has_many :tracked_networks
|
20
|
+
has_many :mining_terms
|
21
|
+
has_many :sieves
|
22
|
+
end
|
23
|
+
|
24
|
+
class TrackedNetwork < ActiveRecord::Base
|
25
|
+
belongs_to :brand
|
26
|
+
belongs_to :network
|
27
|
+
has_many :blogospheres, :dependent => :destroy
|
28
|
+
has_many :viral_networks, :dependent => :destroy
|
29
|
+
has_many :online_presses, :dependent => :destroy
|
30
|
+
has_many :other_networks, :dependent => :destroy
|
31
|
+
has_many :limbos, :dependent => :destroy
|
32
|
+
|
33
|
+
named_scope :all_passive, lambda {|*args| { :conditions => ["brand_id is not null AND tracked_networks.state = ?
|
34
|
+
AND networks.classification = ?", "passive", (args.first || nil)],
|
35
|
+
:order => "tracked_networks.updated_at ASC", :limit => 30, :include => :network}}
|
36
|
+
|
37
|
+
named_scope :especific_brand, lambda {|*args| { :conditions => ["brand_id = ? AND tracked_networks.state = ?
|
38
|
+
AND networks.classification = ?", (args[0] || nil), "passive", (args[1] || nil)],
|
39
|
+
:order => "tracked_networks.updated_at ASC", :limit => 30, :include => :network}}
|
40
|
+
|
41
|
+
named_scope :especific_track, lambda {|*args| { :conditions => ["brand_id is not null AND id = ?
|
42
|
+
AND networks.classification = ?", (args[0] || nil), (args[1] || nil)],
|
43
|
+
:order => "tracked_networks.updated_at ASC", :limit => 30, :include => :network}}
|
44
|
+
|
45
|
+
def processing!
|
46
|
+
self.state = 'in_processing' if self.state == 'passive'
|
47
|
+
self.save
|
48
|
+
end
|
49
|
+
|
50
|
+
def processing?
|
51
|
+
self.state == 'in_processing'
|
52
|
+
end
|
53
|
+
|
54
|
+
def mining_is_ok!
|
55
|
+
self.state = 'passive' if self.state == 'in_processing'
|
56
|
+
self.save
|
57
|
+
end
|
58
|
+
|
59
|
+
def passive?
|
60
|
+
self.state == 'passive'
|
61
|
+
end
|
62
|
+
|
63
|
+
def mining_is_not_ok!
|
64
|
+
self.state = 'registred' if self.state == 'in_processing'
|
65
|
+
self.save
|
66
|
+
end
|
67
|
+
|
68
|
+
def sieves
|
69
|
+
@sieves ||= Sieve.by_brand self.brand_id
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
class OtherNetwork < ActiveRecord::Base
|
75
|
+
belongs_to :tracked_network
|
76
|
+
|
77
|
+
validates_presence_of :source
|
78
|
+
validates_presence_of :topic
|
79
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
80
|
+
end
|
81
|
+
|
82
|
+
class Limbo < ActiveRecord::Base
|
83
|
+
belongs_to :tracked_network
|
84
|
+
|
85
|
+
validates_presence_of :url
|
86
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
87
|
+
end
|
88
|
+
|
89
|
+
class OnlinePress < ActiveRecord::Base
|
90
|
+
belongs_to :tracked_network
|
91
|
+
|
92
|
+
validates_presence_of :url
|
93
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
94
|
+
end
|
95
|
+
|
96
|
+
class ViralNetwork < ActiveRecord::Base
|
97
|
+
belongs_to :tracked_network
|
98
|
+
|
99
|
+
validates_numericality_of :views
|
100
|
+
validates_presence_of :filename, :user
|
101
|
+
validates_uniqueness_of :filename, :scope => :tracked_network_id
|
102
|
+
end
|
103
|
+
|
104
|
+
class Blogosphere < ActiveRecord::Base
|
105
|
+
belongs_to :tracked_network
|
106
|
+
|
107
|
+
validates_presence_of :name
|
108
|
+
validates_uniqueness_of :url, :scope => :tracked_network_id
|
109
|
+
end
|
110
|
+
|
111
|
+
class FamiliarUrl < ActiveRecord::Base
|
112
|
+
validates_presence_of :url
|
113
|
+
|
114
|
+
named_scope :rss_sources, :select => "distinct url", :conditions => ["url_type in (3,4)"]
|
115
|
+
named_scope :url_sources, :conditions => ["url_type not in (3,4)"]
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
class Sieve < ActiveRecord::Base
|
120
|
+
belongs_to :brand
|
121
|
+
|
122
|
+
named_scope :by_brand, lambda {|*args| { :conditions => ["brand_id = ?", (args[0] || nil)]}}
|
123
|
+
named_scope :metagenerator, :conditions => ["url_type = 2"]
|
124
|
+
named_scope :rss_sources, :select => "distinct url", :conditions => ["url_type in (3,4)"]
|
125
|
+
|
126
|
+
def self.by_brand(brand, url_type = nil)
|
127
|
+
if url_type.blank?
|
128
|
+
self.all :conditions => ["brand_id = ?", brand.id]
|
129
|
+
else
|
130
|
+
self.all :conditions => ["brand_id = ? AND url_type = ?", brand.id, url_type]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
class MiningTerm < ActiveRecord::Base
|
136
|
+
belongs_to :brand
|
137
|
+
belongs_to :theme
|
138
|
+
|
139
|
+
def search_terms
|
140
|
+
self.term.split(/,|;/)
|
141
|
+
end
|
142
|
+
|
143
|
+
def exclude_terms
|
144
|
+
exclude_list = []
|
145
|
+
self.filter.split(/,|;/).each do |filters|
|
146
|
+
filters.strip!
|
147
|
+
exclude_list << filters.gsub(/^-/,'') if filters.match(/^-/)
|
148
|
+
end
|
149
|
+
exclude_list
|
150
|
+
end
|
151
|
+
|
152
|
+
def filter_terms
|
153
|
+
list = []
|
154
|
+
self.filter.split(/,|;/).each do |filters|
|
155
|
+
filters.strip!
|
156
|
+
list << filters unless filters.match(/^-/)
|
157
|
+
end
|
158
|
+
list
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
require 'lib/skynet/hippocampus'
|
4
|
+
|
5
|
+
#ActiveRecord::Schema.define do
|
6
|
+
class CreateMemoriesAndLogs < ActiveRecord::Migration
|
7
|
+
|
8
|
+
create_table :memories do |t|
|
9
|
+
t.column :brand_id, :integer
|
10
|
+
t.column :tracked_network_id, :integer
|
11
|
+
t.column :network_type, :integer
|
12
|
+
t.column :url, :string
|
13
|
+
t.column :source, :string
|
14
|
+
t.column :post, :text
|
15
|
+
t.column :author, :string
|
16
|
+
t.column :description, :text
|
17
|
+
t.column :slot_1, :integer
|
18
|
+
t.column :slot_2, :integer
|
19
|
+
t.column :slot_3, :integer
|
20
|
+
t.column :integrated, :boolean, :default => false
|
21
|
+
t.column :created_at, :datetime
|
22
|
+
t.column :integrated_at, :datetime
|
23
|
+
end
|
24
|
+
|
25
|
+
add_index :memories, [:url, :tracked_network_id], :unique => true
|
26
|
+
|
27
|
+
|
28
|
+
create_table :logs do |table|
|
29
|
+
table.column :message, :text
|
30
|
+
table.column :pid, :string
|
31
|
+
table.column :tracked_network_id, :integer
|
32
|
+
table.column :created_at, :datetime
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
require 'lib/skynet/hippocampus'
|
4
|
+
|
5
|
+
class PimpLog < ActiveRecord::Migration
|
6
|
+
def self.up
|
7
|
+
remove_column :logs, :pid
|
8
|
+
add_column :logs, :picked, :integer
|
9
|
+
add_column :logs, :found, :integer
|
10
|
+
add_column :logs, :started_at, :datetime
|
11
|
+
add_column :logs, :finished_at, :datetime
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.down
|
15
|
+
add_column :logs, :pid, :integer
|
16
|
+
remove_column :logs, :picked
|
17
|
+
remove_column :logs, :found
|
18
|
+
remove_column :logs, :started_at
|
19
|
+
remove_column :logs, :finished_at
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# RemoveAccents version 1.0.3 (c) 2008-2009 Solutions Informatiques Techniconseils inc.
|
2
|
+
#
|
3
|
+
# This module adds 2 methods to the string class.
|
4
|
+
# Up-to-date version and documentation available at:
|
5
|
+
#
|
6
|
+
# http://www.techniconseils.ca/en/scripts-remove-accents-ruby.php
|
7
|
+
#
|
8
|
+
# This script is available under the following license :
|
9
|
+
# Creative Commons Attribution-Share Alike 2.5.
|
10
|
+
#
|
11
|
+
# See full license and details at :
|
12
|
+
# http://creativecommons.org/licenses/by-sa/2.5/ca/
|
13
|
+
#
|
14
|
+
# Version history:
|
15
|
+
# * 1.0.3 : July 23 2009
|
16
|
+
# Corrected some incorrect character codes. Source is now wikipedia at:
|
17
|
+
# http://en.wikipedia.org/wiki/ISO/IEC_8859-1#Related_character_maps
|
18
|
+
# Thanks to Raimon Fernandez for pointing out the incorrect codes.
|
19
|
+
# * 1.0.2 : October 29 2008
|
20
|
+
# Slightly optimized version of urlize - Jonathan Grenier (jgrenier@techniconseils.ca)
|
21
|
+
# * 1.0.1 : October 29 2008
|
22
|
+
# First public revision - Jonathan Grenier (jgrenier@techniconseils.ca)
|
23
|
+
#
|
24
|
+
|
25
|
+
class String
|
26
|
+
# The extended characters map used by removeaccents. The accented characters
|
27
|
+
# are coded here using their numerical equivalent to sidestep encoding issues.
|
28
|
+
# These correspond to ISO-8859-1 encoding.
|
29
|
+
ACCENTS_MAPPING = {
|
30
|
+
'E' => [200,201,202,203],
|
31
|
+
'e' => [232,233,234,235],
|
32
|
+
'A' => [192,193,194,195,196,197],
|
33
|
+
'a' => [224,225,226,227,228,229,230],
|
34
|
+
'C' => [199],
|
35
|
+
'c' => [231],
|
36
|
+
'O' => [210,211,212,213,214,216],
|
37
|
+
'o' => [242,243,244,245,246,248],
|
38
|
+
'I' => [204,205,206,207],
|
39
|
+
'i' => [236,237,238,239],
|
40
|
+
'U' => [217,218,219,220],
|
41
|
+
'u' => [249,250,251,252],
|
42
|
+
'N' => [209],
|
43
|
+
'n' => [241],
|
44
|
+
'Y' => [221],
|
45
|
+
'y' => [253,255],
|
46
|
+
'AE' => [306],
|
47
|
+
'ae' => [346],
|
48
|
+
'OE' => [188],
|
49
|
+
'oe' => [189]
|
50
|
+
}
|
51
|
+
|
52
|
+
|
53
|
+
# Remove the accents from the string. Uses String::ACCENTS_MAPPING as the source map.
|
54
|
+
def removeaccents
|
55
|
+
str = String.new(self)
|
56
|
+
String::ACCENTS_MAPPING.each {|letter,accents|
|
57
|
+
packed = accents.pack('U*')
|
58
|
+
rxp = Regexp.new("[#{packed}]", nil, 'U')
|
59
|
+
str.gsub!(rxp, letter)
|
60
|
+
}
|
61
|
+
|
62
|
+
str
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
# Convert a string to a format suitable for a URL without ever using escaped characters.
|
67
|
+
# It calls strip, removeaccents, downcase (optional) then removes the spaces (optional)
|
68
|
+
# and finally removes any characters matching the default regexp (/[^-_A-Za-z0-9]/).
|
69
|
+
#
|
70
|
+
# Options
|
71
|
+
#
|
72
|
+
# * :downcase => call downcase on the string (defaults to true)
|
73
|
+
# * :convert_spaces => Convert space to underscore (defaults to false)
|
74
|
+
# * :regexp => The regexp matching characters that will be converting to an empty string (defaults to /[^-_A-Za-z0-9]/)
|
75
|
+
def urlize(options = {})
|
76
|
+
options[:downcase] ||= true
|
77
|
+
options[:convert_spaces] ||= false
|
78
|
+
options[:regexp] ||= /[^-_A-Za-z0-9]/
|
79
|
+
|
80
|
+
str = self.strip.removeaccents
|
81
|
+
str.downcase! if options[:downcase]
|
82
|
+
str.gsub!(/\ /,'_') if options[:convert_spaces]
|
83
|
+
str.gsub(options[:regexp], '')
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require "#{SKYNET_ROOT}/lib/skynet-core"
|
2
|
+
require "#{SKYNET_ROOT}/lib/skynet-core/db/ibrands"
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
module Skynet
|
6
|
+
extend self
|
7
|
+
|
8
|
+
class Harvest
|
9
|
+
attr_reader :buffer, :buffer_mining_terms,
|
10
|
+
:cost, :results
|
11
|
+
attr_accessor :logger
|
12
|
+
|
13
|
+
def initialize(network_type)
|
14
|
+
@network_type = network_type
|
15
|
+
@logger = Hash.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def base_process(&block)
|
19
|
+
@buffer = Cyberdyne.new @network_type
|
20
|
+
if @buffer.have_track_networks?
|
21
|
+
internal_process &block
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def base_specific_track_process(track_id, &block)
|
26
|
+
@buffer = Cyberdyne.new @network_type
|
27
|
+
if @buffer.specific_track(track_id)
|
28
|
+
internal_process &block
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# O custo do monitoramento é calculado por
|
33
|
+
# quantidade de sinônimos mutiplicado pelo total de termos de mineração
|
34
|
+
def cost_calculation
|
35
|
+
minings = @buffer_mining_terms.size
|
36
|
+
search_term = 0
|
37
|
+
@buffer_mining_terms.each{|t| search_term += t.search_terms.size}
|
38
|
+
@cost = minings * search_term
|
39
|
+
end
|
40
|
+
|
41
|
+
def generate_regex(terms, boundarie = "", remove_accent = false)
|
42
|
+
output = ""
|
43
|
+
terms.each do |t|
|
44
|
+
t = t.removeaccents if remove_accent
|
45
|
+
output << "#{boundarie}#{t.gsub(/\+|\-|\/|\|\?|\*|\\|\|/,'')}"
|
46
|
+
output += "£"
|
47
|
+
end
|
48
|
+
unless output.empty?
|
49
|
+
output.gsub!(/£$/,"")
|
50
|
+
output.gsub!("£","|")
|
51
|
+
end
|
52
|
+
|
53
|
+
"(?i)(#{output})"
|
54
|
+
end
|
55
|
+
|
56
|
+
def find
|
57
|
+
raise "Abstract method called!"
|
58
|
+
end
|
59
|
+
|
60
|
+
protected
|
61
|
+
|
62
|
+
def internal_process
|
63
|
+
@buffer.lock_first_track_network
|
64
|
+
load_mining_terms
|
65
|
+
cost_calculation
|
66
|
+
@buffer_mining_terms.each do |mining_term|
|
67
|
+
mining_term.search_terms.each do |search_term|
|
68
|
+
unless search_term.nil? || search_term.empty?
|
69
|
+
@results = []
|
70
|
+
yield mining_term, search_term
|
71
|
+
end #unless
|
72
|
+
end
|
73
|
+
end
|
74
|
+
@buffer.unlock_first_track_network
|
75
|
+
end
|
76
|
+
|
77
|
+
def load_mining_terms
|
78
|
+
@buffer_mining_terms ||= @buffer.track_network_handler.brand.mining_terms
|
79
|
+
end
|
80
|
+
|
81
|
+
def load_language
|
82
|
+
@buffer_language ||= @buffer.track_network_handler.brand.language
|
83
|
+
end
|
84
|
+
|
85
|
+
end #class
|
86
|
+
|
87
|
+
end #module
|
data/lib/skynet-core.rb
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "yaml"
|
3
|
+
|
4
|
+
SKYNET_ROOT = File.expand_path("#{File.dirname(__FILE__)}/..") unless defined?(SKYNET_ROOT)
|
5
|
+
|
6
|
+
Dir[File.dirname(__FILE__) + '/skynet-core/*.rb'].each {|file| require file }
|
7
|
+
|
8
|
+
# Defines our SKYNET_ENV
|
9
|
+
SKYNET_ENV = ENV["SKYNET_ENV"] ||= ENV["RACK_ENV"] ||= "development" unless defined?(SKYNET_ENV)
|
10
|
+
#
|
11
|
+
module Skynet
|
12
|
+
|
13
|
+
attr_reader :_app_config, :_database_config
|
14
|
+
|
15
|
+
class ApplicationLoadError < RuntimeError #:nodoc:
|
16
|
+
end
|
17
|
+
|
18
|
+
class << self
|
19
|
+
|
20
|
+
def env
|
21
|
+
@_env ||= SKYNET_ENV.to_s.downcase.to_sym
|
22
|
+
end
|
23
|
+
|
24
|
+
def root(*args)
|
25
|
+
File.expand_path(File.join(SKYNET_ROOT, *args))
|
26
|
+
end
|
27
|
+
|
28
|
+
# Default encoding to UTF8 if it has not already been set to something else.
|
29
|
+
#
|
30
|
+
def set_encoding
|
31
|
+
unless RUBY_VERSION >= '1.9'
|
32
|
+
$KCODE = 'U' if $KCODE == 'NONE' || $KCODE.blank?
|
33
|
+
end
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# def bundle
|
39
|
+
# return :locked if File.exist?(root('.bundle/environment.rb'))
|
40
|
+
# return :unlocked if File.exist?(root("Gemfile"))
|
41
|
+
# end
|
42
|
+
|
43
|
+
def load_config
|
44
|
+
app_config
|
45
|
+
database_config
|
46
|
+
|
47
|
+
raise "Config files not found" if @_app_config.nil? || @_database_config.nil?
|
48
|
+
!@_app_config.nil?
|
49
|
+
end
|
50
|
+
|
51
|
+
def app_config
|
52
|
+
@_app_config ||= set_app_config
|
53
|
+
end
|
54
|
+
|
55
|
+
def database_config
|
56
|
+
@_database_config ||= set_database_config
|
57
|
+
end
|
58
|
+
|
59
|
+
def require_dependencies(*paths)
|
60
|
+
# Extract all files to load
|
61
|
+
files = paths.map { |path| Dir[path] }.flatten
|
62
|
+
|
63
|
+
while files.present?
|
64
|
+
# We need a size to make sure things are loading
|
65
|
+
size_at_start = files.size
|
66
|
+
|
67
|
+
# List of errors and failed files
|
68
|
+
errors, failed = [], []
|
69
|
+
|
70
|
+
# Now we try to require our dependencies
|
71
|
+
files.each do |file|
|
72
|
+
begin
|
73
|
+
require file
|
74
|
+
files.delete(file)
|
75
|
+
rescue Exception => e
|
76
|
+
errors << e
|
77
|
+
failed << files
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Stop processing if nothing loads or if everything has loaded
|
82
|
+
raise errors.last if files.size == size_at_start && files.present?
|
83
|
+
break if files.empty?
|
84
|
+
end
|
85
|
+
end
|
86
|
+
alias :require_dependency :require_dependencies
|
87
|
+
|
88
|
+
##
|
89
|
+
# Attempts to load all dependency libs that we need.
|
90
|
+
# If you use this method we can perform correctly a Padrino.reload!
|
91
|
+
#
|
92
|
+
def load_dependencies(*paths)
|
93
|
+
paths.each do |path|
|
94
|
+
Dir[path].each { |file| load(file) }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
alias :load_dependency :load_dependencies
|
98
|
+
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def set_app_config
|
103
|
+
YAML::load(File.open(root("config/settings.yml"))) if File.exist?(root("config/settings.yml"))
|
104
|
+
end
|
105
|
+
|
106
|
+
def set_database_config
|
107
|
+
YAML::load(File.open(root("config/database.yml"))) if File.exist?(root("config/database.yml"))
|
108
|
+
end
|
109
|
+
|
110
|
+
end # self
|
111
|
+
end
|
data/skynet-core.gemspec
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{skynet-core}
|
5
|
+
s.version = "0.0.3"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Henry Hamon"]
|
9
|
+
s.date = %q{2010-06-09}
|
10
|
+
s.description = %q{Datamining Bot}
|
11
|
+
s.email = %q{henry.hamon@a2c.com.br}
|
12
|
+
s.extra_rdoc_files = ["README.mkdn", "lib/skynet-core.rb", "lib/skynet-core/cyberdyne.rb", "lib/skynet-core/db/hippocampus.rb", "lib/skynet-core/db/ibrands.rb", "lib/skynet-core/db/migrate/001_create_actives_and_logs.rb", "lib/skynet-core/db/migrate/002_pimp_log.rb", "lib/skynet-core/db/migration.rb", "lib/skynet-core/extend_string.rb", "lib/skynet-core/harvest.rb"]
|
13
|
+
s.files = ["Manifest", "README.mkdn", "Rakefile", "config/database.yml.example", "config/settings.yml", "config/settings.yml.example", "lib/skynet-core.rb", "lib/skynet-core/cyberdyne.rb", "lib/skynet-core/db/hippocampus.rb", "lib/skynet-core/db/ibrands.rb", "lib/skynet-core/db/migrate/001_create_actives_and_logs.rb", "lib/skynet-core/db/migrate/002_pimp_log.rb", "lib/skynet-core/db/migration.rb", "lib/skynet-core/extend_string.rb", "lib/skynet-core/harvest.rb", "skynet-core.gemspec", "spec/cyberdyne_spec.rb", "spec/harvest_spec.rb", "spec/skynet-core_spec.rb"]
|
14
|
+
s.homepage = %q{http://github.com/a2c/skynet-core}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Skynet-core", "--main", "README.mkdn"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{skynet-core}
|
18
|
+
s.rubygems_version = %q{1.3.7}
|
19
|
+
s.summary = %q{Datamining Bot}
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
23
|
+
s.specification_version = 3
|
24
|
+
|
25
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
26
|
+
s.add_runtime_dependency(%q<json>, [">= 0"])
|
27
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
28
|
+
s.add_runtime_dependency(%q<mechanize>, [">= 0"])
|
29
|
+
s.add_runtime_dependency(%q<activerecord>, [">= 0"])
|
30
|
+
s.add_runtime_dependency(%q<simpleconsole>, [">= 0"])
|
31
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
32
|
+
else
|
33
|
+
s.add_dependency(%q<json>, [">= 0"])
|
34
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
35
|
+
s.add_dependency(%q<mechanize>, [">= 0"])
|
36
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
37
|
+
s.add_dependency(%q<simpleconsole>, [">= 0"])
|
38
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
39
|
+
end
|
40
|
+
else
|
41
|
+
s.add_dependency(%q<json>, [">= 0"])
|
42
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
43
|
+
s.add_dependency(%q<mechanize>, [">= 0"])
|
44
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
45
|
+
s.add_dependency(%q<simpleconsole>, [">= 0"])
|
46
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'spec'
|
3
|
+
|
4
|
+
require File.dirname(__FILE__) + '/../lib/skynet-core'
|
5
|
+
|
6
|
+
describe "cyberdyne" do
|
7
|
+
before(:each) do
|
8
|
+
@cyberdyne = Skynet::Cyberdyne.new 7
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should have track" do
|
12
|
+
@cyberdyne.have_track_networks?.should be_true
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should have lock the first track" do
|
16
|
+
@cyberdyne.lock_first_track_network.should be_true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should have unlock the track" do
|
20
|
+
@cyberdyne.unlock_first_track_network.should be_true
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should load the track" do
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'spec'
|
3
|
+
|
4
|
+
require File.dirname(__FILE__) + '/../lib/skynet-core'
|
5
|
+
|
6
|
+
describe "skynet-core" do
|
7
|
+
|
8
|
+
# it "check some global methods" do
|
9
|
+
# should_validate_presence_of Skynet, :root
|
10
|
+
# end
|
11
|
+
|
12
|
+
# it "should not find config files" do
|
13
|
+
|
14
|
+
# if File.exist?(File.dirname(__FILE__) + '/../config/settings.xml')
|
15
|
+
# system "mv #{File.dirname(__FILE__) + '/../config/settings.xml'} #{File.dirname(__FILE__) + '/../config/settings.xml.example'}"
|
16
|
+
# end
|
17
|
+
|
18
|
+
# lambda{Skynet.load_config}.should raise_error(RuntimeError, "Config files not found")
|
19
|
+
|
20
|
+
# end
|
21
|
+
|
22
|
+
it "should find config files" do
|
23
|
+
|
24
|
+
if File.exist?(File.dirname(__FILE__) + '/../config/settings.xml.example')
|
25
|
+
system "mv #{File.dirname(__FILE__) + '/../config/settings.xml.example'} #{File.dirname(__FILE__) + '/../config/settings.xml'}"
|
26
|
+
end
|
27
|
+
|
28
|
+
Skynet.load_config.should be_true
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: skynet-core
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Henry Hamon
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-06-09 00:00:00 -03:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: json
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: nokogiri
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: mechanize
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: activerecord
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
type: :runtime
|
76
|
+
version_requirements: *id004
|
77
|
+
- !ruby/object:Gem::Dependency
|
78
|
+
name: simpleconsole
|
79
|
+
prerelease: false
|
80
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
type: :runtime
|
90
|
+
version_requirements: *id005
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: rspec
|
93
|
+
prerelease: false
|
94
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
hash: 3
|
100
|
+
segments:
|
101
|
+
- 0
|
102
|
+
version: "0"
|
103
|
+
type: :development
|
104
|
+
version_requirements: *id006
|
105
|
+
description: Datamining Bot
|
106
|
+
email: henry.hamon@a2c.com.br
|
107
|
+
executables: []
|
108
|
+
|
109
|
+
extensions: []
|
110
|
+
|
111
|
+
extra_rdoc_files:
|
112
|
+
- README.mkdn
|
113
|
+
- lib/skynet-core.rb
|
114
|
+
- lib/skynet-core/cyberdyne.rb
|
115
|
+
- lib/skynet-core/db/hippocampus.rb
|
116
|
+
- lib/skynet-core/db/ibrands.rb
|
117
|
+
- lib/skynet-core/db/migrate/001_create_actives_and_logs.rb
|
118
|
+
- lib/skynet-core/db/migrate/002_pimp_log.rb
|
119
|
+
- lib/skynet-core/db/migration.rb
|
120
|
+
- lib/skynet-core/extend_string.rb
|
121
|
+
- lib/skynet-core/harvest.rb
|
122
|
+
files:
|
123
|
+
- Manifest
|
124
|
+
- README.mkdn
|
125
|
+
- Rakefile
|
126
|
+
- config/database.yml.example
|
127
|
+
- config/settings.yml
|
128
|
+
- config/settings.yml.example
|
129
|
+
- lib/skynet-core.rb
|
130
|
+
- lib/skynet-core/cyberdyne.rb
|
131
|
+
- lib/skynet-core/db/hippocampus.rb
|
132
|
+
- lib/skynet-core/db/ibrands.rb
|
133
|
+
- lib/skynet-core/db/migrate/001_create_actives_and_logs.rb
|
134
|
+
- lib/skynet-core/db/migrate/002_pimp_log.rb
|
135
|
+
- lib/skynet-core/db/migration.rb
|
136
|
+
- lib/skynet-core/extend_string.rb
|
137
|
+
- lib/skynet-core/harvest.rb
|
138
|
+
- skynet-core.gemspec
|
139
|
+
- spec/cyberdyne_spec.rb
|
140
|
+
- spec/harvest_spec.rb
|
141
|
+
- spec/skynet-core_spec.rb
|
142
|
+
has_rdoc: true
|
143
|
+
homepage: http://github.com/a2c/skynet-core
|
144
|
+
licenses: []
|
145
|
+
|
146
|
+
post_install_message:
|
147
|
+
rdoc_options:
|
148
|
+
- --line-numbers
|
149
|
+
- --inline-source
|
150
|
+
- --title
|
151
|
+
- Skynet-core
|
152
|
+
- --main
|
153
|
+
- README.mkdn
|
154
|
+
require_paths:
|
155
|
+
- lib
|
156
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
157
|
+
none: false
|
158
|
+
requirements:
|
159
|
+
- - ">="
|
160
|
+
- !ruby/object:Gem::Version
|
161
|
+
hash: 3
|
162
|
+
segments:
|
163
|
+
- 0
|
164
|
+
version: "0"
|
165
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
166
|
+
none: false
|
167
|
+
requirements:
|
168
|
+
- - ">="
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
hash: 11
|
171
|
+
segments:
|
172
|
+
- 1
|
173
|
+
- 2
|
174
|
+
version: "1.2"
|
175
|
+
requirements: []
|
176
|
+
|
177
|
+
rubyforge_project: skynet-core
|
178
|
+
rubygems_version: 1.3.7
|
179
|
+
signing_key:
|
180
|
+
specification_version: 3
|
181
|
+
summary: Datamining Bot
|
182
|
+
test_files: []
|
183
|
+
|