sentra 0.2.2 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/config.ru +1 -0
- data/bin/config.yml +1 -0
- data/lib/sentra/opr.rb +48 -0
- data/lib/sentra/version.rb +1 -1
- data/lib/sentra.rb +50 -69
- data/tools/multithread_batch.rb +60 -0
- data/tools/multithread_check.rb +3 -3
- data/tools/singlethread_check.rb +1 -1
- data/tools/tools_helper.rb +8 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53eebda466208eb48275bd61128748e7ca7f2ff6
|
4
|
+
data.tar.gz: 5ad2fe6740404c2667add696685efcb306d7a2e4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a98e98d92cbc68ed9ccb509e7be8d81023bbbedb89f34269def7457f25255c49a9f4a034ae20a2cfbe0e8031013988244d000f66ba864f1232ef7c7e4c26ff02
|
7
|
+
data.tar.gz: 275514cc4bb7a06c80fdffa984ff5783d0c8093063f10d06c9d4e676353a340c7cb57908ff586f72f002b193fbf7f1aa3bca5e9336daaae877a22cf86c63bcad
|
data/bin/config.ru
CHANGED
data/bin/config.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
parallelism: 4
|
data/lib/sentra/opr.rb
CHANGED
@@ -34,6 +34,22 @@ module SemanticService
|
|
34
34
|
|
35
35
|
end
|
36
36
|
|
37
|
+
module Storm
|
38
|
+
include_package "crriskdata.opr.nlp.storm"
|
39
|
+
import "crriskdata.opr.nlp.storm"
|
40
|
+
end
|
41
|
+
|
42
|
+
module StormDRPC
|
43
|
+
include_package "crriskdata.opr.nlp.storm.drpc"
|
44
|
+
import "crriskdata.opr.nlp.storm.drpc"
|
45
|
+
end
|
46
|
+
|
47
|
+
module StormDrpcTopology
|
48
|
+
include_package "crriskdata.opr.nlp.storm.rabbitmq"
|
49
|
+
import "crriskdata.opr.nlp.storm.rabbitmq"
|
50
|
+
end
|
51
|
+
|
52
|
+
|
37
53
|
module ArticleTools
|
38
54
|
class ArticleSerializer
|
39
55
|
def initialize
|
@@ -54,4 +70,36 @@ module ArticleTools
|
|
54
70
|
@simpleFormat.parse(input)
|
55
71
|
end
|
56
72
|
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def prepare_article content, url, title, domain='GENERAL'
|
76
|
+
tweet = OprData::Article.new
|
77
|
+
|
78
|
+
calendar = Util::Calendar.getInstance();
|
79
|
+
tweet.setUrl(Net::URL.new(url));
|
80
|
+
tweet.setDate(calendar.getTime)
|
81
|
+
tweet.setTitle(title)
|
82
|
+
tweet.setChannel(domain)
|
83
|
+
tweet.setContent(content)
|
84
|
+
tweet
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def process_result statistics
|
89
|
+
response = {}
|
90
|
+
# puts 'here statistics'
|
91
|
+
# p statistics
|
92
|
+
if statistics[3].getValue.to_f > 0
|
93
|
+
response['negative'] = statistics[0].getValue.to_f / statistics[3].getValue.to_f
|
94
|
+
response['positive'] = statistics[1].getValue.to_f / statistics[3].getValue.to_f
|
95
|
+
response['neutral'] = statistics[2].getValue.to_f / statistics[3].getValue.to_f
|
96
|
+
|
97
|
+
else
|
98
|
+
response['negative'] = 0
|
99
|
+
response['positive'] = 0
|
100
|
+
response['neutral'] = 1
|
101
|
+
|
102
|
+
end
|
103
|
+
response['details'] = {}
|
104
|
+
response
|
57
105
|
end
|
data/lib/sentra/version.rb
CHANGED
data/lib/sentra.rb
CHANGED
@@ -7,6 +7,7 @@ require 'sentra/opr'
|
|
7
7
|
require 'sentra/rest_client'
|
8
8
|
require "sentra/sents"
|
9
9
|
require 'securerandom'
|
10
|
+
require 'yaml'
|
10
11
|
|
11
12
|
|
12
13
|
module Sentra
|
@@ -15,8 +16,37 @@ module Sentra
|
|
15
16
|
puts "Hello, gem "
|
16
17
|
class SentraService < Sinatra::Base
|
17
18
|
configure do
|
19
|
+
|
20
|
+
config_path = Dir.pwd + "/config.yml"
|
21
|
+
p config_path
|
22
|
+
if File.exists? config_path
|
23
|
+
topolog_settings = YAML.load_file config_path
|
24
|
+
puts 'Config file exists and read'
|
25
|
+
else
|
26
|
+
topolog_settings = {}
|
27
|
+
topolog_settings['parallelism'] = 4
|
28
|
+
puts 'Config file does not exist parallelism set to 3'
|
29
|
+
end
|
30
|
+
p topolog_settings
|
31
|
+
|
32
|
+
s = java.util.HashSet.new
|
33
|
+
s.add(StormDrpcTopology::STAGES::NORMALIZE_STAGE.name());
|
34
|
+
s.add(StormDrpcTopology::STAGES::SENTENCES_STAGE.name());
|
35
|
+
s.add(StormDrpcTopology::STAGES::POSTAG_STAGE.name());
|
36
|
+
s.add(StormDrpcTopology::STAGES::ENTITIES_STAGE.name());
|
37
|
+
s.add(StormDrpcTopology::STAGES::SENTRA_STAGE.name());
|
38
|
+
s.add(StormDrpcTopology::STAGES::TWITTER_STAGE.name());
|
39
|
+
|
40
|
+
|
41
|
+
spout = StormDRPC::ArticlesMapSpout
|
42
|
+
bolt = StormDRPC::ArticlesMapBolt
|
43
|
+
cluster = StormDrpcTopology::ServiceSemanticToplogy.createLocalCluster(StormDRPC::ArticlesMapSpout.new,
|
44
|
+
StormDRPC::ArticlesMapBolt.new, s, topolog_settings['parallelism'].to_i)
|
18
45
|
set :server, :trinidad
|
19
46
|
set :bind, '0.0.0.0'
|
47
|
+
set :spout, spout
|
48
|
+
set :bolt, bolt
|
49
|
+
set :cluster, cluster
|
20
50
|
end
|
21
51
|
|
22
52
|
|
@@ -30,39 +60,8 @@ module Sentra
|
|
30
60
|
end
|
31
61
|
|
32
62
|
|
33
|
-
post '/sentra/analyzer/atomic' do
|
34
|
-
query = request.body.read.to_s
|
35
|
-
begin
|
36
|
-
query = JSON.parse(query)
|
37
|
-
rescue Exception => e
|
38
|
-
puts e.message
|
39
|
-
query = {}
|
40
|
-
query['content'] = nil
|
41
|
-
end
|
42
|
-
if params['key'] and params['key'] =="QD3F7Yr2u098RfaB" and query['content']
|
43
|
-
processing_client = Sentra::ProcessingClient.new
|
44
|
-
processing_client.prepare_article query['content'],
|
45
|
-
"http://" +SecureRandom.urlsafe_base64 + ".com/" + SecureRandom.urlsafe_base64,
|
46
|
-
"Test1"
|
47
|
-
processing_client.send
|
48
|
-
response = processing_client.receive
|
49
|
-
begin
|
50
|
-
processing_client.process_result(response).to_json
|
51
|
-
rescue Exception => e
|
52
|
-
puts e.message
|
53
|
-
response = {'error' => e.message}
|
54
|
-
response.to_json
|
55
|
-
end
|
56
|
-
|
57
|
-
elsif params['key'] and params['key'] =="QD3F7Yr2u098RfaB" and not query['content']
|
58
|
-
return "There is no text to process"
|
59
|
-
else
|
60
|
-
return "Not authorized"
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
63
|
post '/sentra/analyzer/batch' do
|
64
|
+
|
66
65
|
query = request.body.read.to_s
|
67
66
|
begin
|
68
67
|
query = JSON.parse(query)
|
@@ -70,44 +69,26 @@ module Sentra
|
|
70
69
|
puts 'Error with JSON'
|
71
70
|
query = nil
|
72
71
|
end
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
query.each_with_index do |tweet, index|
|
78
|
-
begin
|
79
|
-
processing_client = Sentra::ProcessingClient.new
|
80
|
-
|
81
|
-
processing_client.prepare_article tweet['content'],
|
82
|
-
"http://" + SecureRandom.urlsafe_base64 + ".com/" + SecureRandom.urlsafe_base64,
|
83
|
-
"Test" + index.to_s
|
84
|
-
processing_client.send
|
85
|
-
clients.push processing_client
|
86
|
-
|
87
|
-
rescue
|
88
|
-
puts 'Errors with getting result from socket or queue'
|
89
|
-
end
|
90
|
-
end
|
91
|
-
clients.each do |client|
|
92
|
-
begin
|
93
|
-
|
94
|
-
response = client.receive
|
95
|
-
p response
|
96
|
-
responses.push(client.process_result(response))
|
97
|
-
rescue
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
begin
|
102
|
-
responses.to_json
|
103
|
-
rescue Exception => e
|
104
|
-
puts 'Errors with converting result to json'
|
105
|
-
end
|
106
|
-
elsif params['key'] and params['key'] =="QD3F7Yr2u098RfaB" and not query and not query['content']
|
107
|
-
return "There is no text to process"
|
108
|
-
else
|
109
|
-
return "Not authorized"
|
72
|
+
|
73
|
+
list = java.util.ArrayList.new
|
74
|
+
query.each_with_index do |tweet, index|
|
75
|
+
list.add(tweet['content'])
|
110
76
|
end
|
77
|
+
|
78
|
+
responses = []
|
79
|
+
|
80
|
+
key = SecureRandom.urlsafe_base64
|
81
|
+
settings.spout.putArticle(key, list);
|
82
|
+
article_cl = settings.bolt.getArticle(key)
|
83
|
+
|
84
|
+
# puts "next"
|
85
|
+
# p article_cl
|
86
|
+
article_cl.each { |a|
|
87
|
+
facts = a.getMetadata('twitter').getFacts()
|
88
|
+
responses.push(process_result(facts))
|
89
|
+
}
|
90
|
+
|
91
|
+
responses.to_json
|
111
92
|
end
|
112
93
|
|
113
94
|
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'tools_helper'
|
2
|
+
|
3
|
+
class CheckBatch
|
4
|
+
def initialize id, host='localhost:4567'
|
5
|
+
@id=id
|
6
|
+
mycsv = CSV_MODULE::MyCSV.new('auxillary/testdata.csv')
|
7
|
+
@queries = mycsv.get_queries
|
8
|
+
@iterations = 100
|
9
|
+
@url = "http://#{host}/sentra/analyzer/batch"
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
puts 'STARTED THREAD NUMBER: ' + @id.to_s
|
14
|
+
|
15
|
+
r = Random.new
|
16
|
+
|
17
|
+
batch_query = []
|
18
|
+
|
19
|
+
@iterations.times do |time|
|
20
|
+
q = {}
|
21
|
+
q['content'] = @queries[r.rand(0..490)]['content']
|
22
|
+
batch_query.push q
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
response = RestClient2.post(@url, batch_query.to_json)
|
27
|
+
|
28
|
+
json = JSON.parse(response.body)
|
29
|
+
p json
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
num_threads = 1
|
37
|
+
host = '10.1.2.230:4567'
|
38
|
+
|
39
|
+
tasks = []
|
40
|
+
executor = ThreadPoolExecutor.new(num_threads, # core_pool_treads
|
41
|
+
num_threads + 10, # max_pool_threads
|
42
|
+
60, # keep_alive_time
|
43
|
+
TimeUnit::SECONDS,
|
44
|
+
LinkedBlockingQueue.new)
|
45
|
+
|
46
|
+
|
47
|
+
num_threads.times do |i|
|
48
|
+
crawler = CheckBatch.new(i) do |selff|
|
49
|
+
end
|
50
|
+
|
51
|
+
task = FutureTask.new(crawler)
|
52
|
+
executor.execute(task)
|
53
|
+
tasks << task
|
54
|
+
end
|
55
|
+
|
56
|
+
tasks.each do |t|
|
57
|
+
t.get
|
58
|
+
end
|
59
|
+
|
60
|
+
executor.shutdown()
|
data/tools/multithread_check.rb
CHANGED
@@ -35,7 +35,7 @@ class CheckLoad
|
|
35
35
|
q['content'] = @queries[r.rand(0..490)]['content']
|
36
36
|
puts "QUERY TO SEND: '" + q['content'] + "'"
|
37
37
|
|
38
|
-
url = "http://
|
38
|
+
url = "http://10.1.2.230:4567/sentra/analyzer/atomic?key=QD3F7Yr2u098RfaB"
|
39
39
|
response = RestClient2.post(url, q.to_json)
|
40
40
|
|
41
41
|
json = JSON.parse(response.body)
|
@@ -46,8 +46,8 @@ class CheckLoad
|
|
46
46
|
end
|
47
47
|
|
48
48
|
|
49
|
-
executor = ThreadPoolExecutor.new(
|
50
|
-
|
49
|
+
executor = ThreadPoolExecutor.new(20, # core_pool_treads
|
50
|
+
30, # max_pool_threads
|
51
51
|
60, # keep_alive_time
|
52
52
|
TimeUnit::SECONDS,
|
53
53
|
LinkedBlockingQueue.new)
|
data/tools/singlethread_check.rb
CHANGED
@@ -9,7 +9,7 @@ q = {}
|
|
9
9
|
q['content'] = 'Hans Morris makes excellent point about P2P risks should be.'
|
10
10
|
puts "QUERY TO SEND: '" + q['content'] + "'"
|
11
11
|
|
12
|
-
url = "http://
|
12
|
+
url = "http://10.1.2.230:4567/sentra/analyzer/atomic?key=QD3F7Yr2u098RfaB"
|
13
13
|
response = RestClient2.post(url, q.to_json)
|
14
14
|
|
15
15
|
json = JSON.parse(response.body)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
java_import 'java.util.concurrent.Callable'
|
2
|
+
java_import 'java.util.concurrent.FutureTask'
|
3
|
+
java_import 'java.util.concurrent.LinkedBlockingQueue'
|
4
|
+
java_import 'java.util.concurrent.ThreadPoolExecutor'
|
5
|
+
java_import 'java.util.concurrent.TimeUnit'
|
6
|
+
|
7
|
+
require 'sentra/rest_client'
|
8
|
+
require "sentra/sents"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kshakirov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -138,6 +138,7 @@ files:
|
|
138
138
|
- README_INSTALL
|
139
139
|
- Rakefile
|
140
140
|
- bin/config.ru
|
141
|
+
- bin/config.yml
|
141
142
|
- bin/console
|
142
143
|
- bin/setup
|
143
144
|
- lib/sentra.rb
|
@@ -150,8 +151,10 @@ files:
|
|
150
151
|
- tools/auxillary/tweets.csv
|
151
152
|
- tools/calculate_sents_diff.rb
|
152
153
|
- tools/compare_sentiments.rb
|
154
|
+
- tools/multithread_batch.rb
|
153
155
|
- tools/multithread_check.rb
|
154
156
|
- tools/singlethread_check.rb
|
157
|
+
- tools/tools_helper.rb
|
155
158
|
homepage: https://rubygems.org
|
156
159
|
licenses:
|
157
160
|
- MIT
|