sentra 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/config.ru +1 -0
- data/bin/config.yml +1 -0
- data/lib/sentra/opr.rb +48 -0
- data/lib/sentra/version.rb +1 -1
- data/lib/sentra.rb +50 -69
- data/tools/multithread_batch.rb +60 -0
- data/tools/multithread_check.rb +3 -3
- data/tools/singlethread_check.rb +1 -1
- data/tools/tools_helper.rb +8 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53eebda466208eb48275bd61128748e7ca7f2ff6
|
4
|
+
data.tar.gz: 5ad2fe6740404c2667add696685efcb306d7a2e4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a98e98d92cbc68ed9ccb509e7be8d81023bbbedb89f34269def7457f25255c49a9f4a034ae20a2cfbe0e8031013988244d000f66ba864f1232ef7c7e4c26ff02
|
7
|
+
data.tar.gz: 275514cc4bb7a06c80fdffa984ff5783d0c8093063f10d06c9d4e676353a340c7cb57908ff586f72f002b193fbf7f1aa3bca5e9336daaae877a22cf86c63bcad
|
data/bin/config.ru
CHANGED
data/bin/config.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
parallelism: 4
|
data/lib/sentra/opr.rb
CHANGED
@@ -34,6 +34,22 @@ module SemanticService
|
|
34
34
|
|
35
35
|
end
|
36
36
|
|
37
|
+
module Storm
|
38
|
+
include_package "crriskdata.opr.nlp.storm"
|
39
|
+
import "crriskdata.opr.nlp.storm"
|
40
|
+
end
|
41
|
+
|
42
|
+
module StormDRPC
|
43
|
+
include_package "crriskdata.opr.nlp.storm.drpc"
|
44
|
+
import "crriskdata.opr.nlp.storm.drpc"
|
45
|
+
end
|
46
|
+
|
47
|
+
module StormDrpcTopology
|
48
|
+
include_package "crriskdata.opr.nlp.storm.rabbitmq"
|
49
|
+
import "crriskdata.opr.nlp.storm.rabbitmq"
|
50
|
+
end
|
51
|
+
|
52
|
+
|
37
53
|
module ArticleTools
|
38
54
|
class ArticleSerializer
|
39
55
|
def initialize
|
@@ -54,4 +70,36 @@ module ArticleTools
|
|
54
70
|
@simpleFormat.parse(input)
|
55
71
|
end
|
56
72
|
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def prepare_article content, url, title, domain='GENERAL'
|
76
|
+
tweet = OprData::Article.new
|
77
|
+
|
78
|
+
calendar = Util::Calendar.getInstance();
|
79
|
+
tweet.setUrl(Net::URL.new(url));
|
80
|
+
tweet.setDate(calendar.getTime)
|
81
|
+
tweet.setTitle(title)
|
82
|
+
tweet.setChannel(domain)
|
83
|
+
tweet.setContent(content)
|
84
|
+
tweet
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def process_result statistics
|
89
|
+
response = {}
|
90
|
+
# puts 'here statistics'
|
91
|
+
# p statistics
|
92
|
+
if statistics[3].getValue.to_f > 0
|
93
|
+
response['negative'] = statistics[0].getValue.to_f / statistics[3].getValue.to_f
|
94
|
+
response['positive'] = statistics[1].getValue.to_f / statistics[3].getValue.to_f
|
95
|
+
response['neutral'] = statistics[2].getValue.to_f / statistics[3].getValue.to_f
|
96
|
+
|
97
|
+
else
|
98
|
+
response['negative'] = 0
|
99
|
+
response['positive'] = 0
|
100
|
+
response['neutral'] = 1
|
101
|
+
|
102
|
+
end
|
103
|
+
response['details'] = {}
|
104
|
+
response
|
57
105
|
end
|
data/lib/sentra/version.rb
CHANGED
data/lib/sentra.rb
CHANGED
@@ -7,6 +7,7 @@ require 'sentra/opr'
|
|
7
7
|
require 'sentra/rest_client'
|
8
8
|
require "sentra/sents"
|
9
9
|
require 'securerandom'
|
10
|
+
require 'yaml'
|
10
11
|
|
11
12
|
|
12
13
|
module Sentra
|
@@ -15,8 +16,37 @@ module Sentra
|
|
15
16
|
puts "Hello, gem "
|
16
17
|
class SentraService < Sinatra::Base
|
17
18
|
configure do
|
19
|
+
|
20
|
+
config_path = Dir.pwd + "/config.yml"
|
21
|
+
p config_path
|
22
|
+
if File.exists? config_path
|
23
|
+
topolog_settings = YAML.load_file config_path
|
24
|
+
puts 'Config file exists and read'
|
25
|
+
else
|
26
|
+
topolog_settings = {}
|
27
|
+
topolog_settings['parallelism'] = 4
|
28
|
+
puts 'Config file does not exist parallelism set to 3'
|
29
|
+
end
|
30
|
+
p topolog_settings
|
31
|
+
|
32
|
+
s = java.util.HashSet.new
|
33
|
+
s.add(StormDrpcTopology::STAGES::NORMALIZE_STAGE.name());
|
34
|
+
s.add(StormDrpcTopology::STAGES::SENTENCES_STAGE.name());
|
35
|
+
s.add(StormDrpcTopology::STAGES::POSTAG_STAGE.name());
|
36
|
+
s.add(StormDrpcTopology::STAGES::ENTITIES_STAGE.name());
|
37
|
+
s.add(StormDrpcTopology::STAGES::SENTRA_STAGE.name());
|
38
|
+
s.add(StormDrpcTopology::STAGES::TWITTER_STAGE.name());
|
39
|
+
|
40
|
+
|
41
|
+
spout = StormDRPC::ArticlesMapSpout
|
42
|
+
bolt = StormDRPC::ArticlesMapBolt
|
43
|
+
cluster = StormDrpcTopology::ServiceSemanticToplogy.createLocalCluster(StormDRPC::ArticlesMapSpout.new,
|
44
|
+
StormDRPC::ArticlesMapBolt.new, s, topolog_settings['parallelism'].to_i)
|
18
45
|
set :server, :trinidad
|
19
46
|
set :bind, '0.0.0.0'
|
47
|
+
set :spout, spout
|
48
|
+
set :bolt, bolt
|
49
|
+
set :cluster, cluster
|
20
50
|
end
|
21
51
|
|
22
52
|
|
@@ -30,39 +60,8 @@ module Sentra
|
|
30
60
|
end
|
31
61
|
|
32
62
|
|
33
|
-
post '/sentra/analyzer/atomic' do
|
34
|
-
query = request.body.read.to_s
|
35
|
-
begin
|
36
|
-
query = JSON.parse(query)
|
37
|
-
rescue Exception => e
|
38
|
-
puts e.message
|
39
|
-
query = {}
|
40
|
-
query['content'] = nil
|
41
|
-
end
|
42
|
-
if params['key'] and params['key'] =="QD3F7Yr2u098RfaB" and query['content']
|
43
|
-
processing_client = Sentra::ProcessingClient.new
|
44
|
-
processing_client.prepare_article query['content'],
|
45
|
-
"http://" +SecureRandom.urlsafe_base64 + ".com/" + SecureRandom.urlsafe_base64,
|
46
|
-
"Test1"
|
47
|
-
processing_client.send
|
48
|
-
response = processing_client.receive
|
49
|
-
begin
|
50
|
-
processing_client.process_result(response).to_json
|
51
|
-
rescue Exception => e
|
52
|
-
puts e.message
|
53
|
-
response = {'error' => e.message}
|
54
|
-
response.to_json
|
55
|
-
end
|
56
|
-
|
57
|
-
elsif params['key'] and params['key'] =="QD3F7Yr2u098RfaB" and not query['content']
|
58
|
-
return "There is no text to process"
|
59
|
-
else
|
60
|
-
return "Not authorized"
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
63
|
post '/sentra/analyzer/batch' do
|
64
|
+
|
66
65
|
query = request.body.read.to_s
|
67
66
|
begin
|
68
67
|
query = JSON.parse(query)
|
@@ -70,44 +69,26 @@ module Sentra
|
|
70
69
|
puts 'Error with JSON'
|
71
70
|
query = nil
|
72
71
|
end
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
query.each_with_index do |tweet, index|
|
78
|
-
begin
|
79
|
-
processing_client = Sentra::ProcessingClient.new
|
80
|
-
|
81
|
-
processing_client.prepare_article tweet['content'],
|
82
|
-
"http://" + SecureRandom.urlsafe_base64 + ".com/" + SecureRandom.urlsafe_base64,
|
83
|
-
"Test" + index.to_s
|
84
|
-
processing_client.send
|
85
|
-
clients.push processing_client
|
86
|
-
|
87
|
-
rescue
|
88
|
-
puts 'Errors with getting result from socket or queue'
|
89
|
-
end
|
90
|
-
end
|
91
|
-
clients.each do |client|
|
92
|
-
begin
|
93
|
-
|
94
|
-
response = client.receive
|
95
|
-
p response
|
96
|
-
responses.push(client.process_result(response))
|
97
|
-
rescue
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
begin
|
102
|
-
responses.to_json
|
103
|
-
rescue Exception => e
|
104
|
-
puts 'Errors with converting result to json'
|
105
|
-
end
|
106
|
-
elsif params['key'] and params['key'] =="QD3F7Yr2u098RfaB" and not query and not query['content']
|
107
|
-
return "There is no text to process"
|
108
|
-
else
|
109
|
-
return "Not authorized"
|
72
|
+
|
73
|
+
list = java.util.ArrayList.new
|
74
|
+
query.each_with_index do |tweet, index|
|
75
|
+
list.add(tweet['content'])
|
110
76
|
end
|
77
|
+
|
78
|
+
responses = []
|
79
|
+
|
80
|
+
key = SecureRandom.urlsafe_base64
|
81
|
+
settings.spout.putArticle(key, list);
|
82
|
+
article_cl = settings.bolt.getArticle(key)
|
83
|
+
|
84
|
+
# puts "next"
|
85
|
+
# p article_cl
|
86
|
+
article_cl.each { |a|
|
87
|
+
facts = a.getMetadata('twitter').getFacts()
|
88
|
+
responses.push(process_result(facts))
|
89
|
+
}
|
90
|
+
|
91
|
+
responses.to_json
|
111
92
|
end
|
112
93
|
|
113
94
|
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'tools_helper'
|
2
|
+
|
3
|
+
class CheckBatch
|
4
|
+
def initialize id, host='localhost:4567'
|
5
|
+
@id=id
|
6
|
+
mycsv = CSV_MODULE::MyCSV.new('auxillary/testdata.csv')
|
7
|
+
@queries = mycsv.get_queries
|
8
|
+
@iterations = 100
|
9
|
+
@url = "http://#{host}/sentra/analyzer/batch"
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
puts 'STARTED THREAD NUMBER: ' + @id.to_s
|
14
|
+
|
15
|
+
r = Random.new
|
16
|
+
|
17
|
+
batch_query = []
|
18
|
+
|
19
|
+
@iterations.times do |time|
|
20
|
+
q = {}
|
21
|
+
q['content'] = @queries[r.rand(0..490)]['content']
|
22
|
+
batch_query.push q
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
response = RestClient2.post(@url, batch_query.to_json)
|
27
|
+
|
28
|
+
json = JSON.parse(response.body)
|
29
|
+
p json
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
num_threads = 1
|
37
|
+
host = '10.1.2.230:4567'
|
38
|
+
|
39
|
+
tasks = []
|
40
|
+
executor = ThreadPoolExecutor.new(num_threads, # core_pool_treads
|
41
|
+
num_threads + 10, # max_pool_threads
|
42
|
+
60, # keep_alive_time
|
43
|
+
TimeUnit::SECONDS,
|
44
|
+
LinkedBlockingQueue.new)
|
45
|
+
|
46
|
+
|
47
|
+
num_threads.times do |i|
|
48
|
+
crawler = CheckBatch.new(i) do |selff|
|
49
|
+
end
|
50
|
+
|
51
|
+
task = FutureTask.new(crawler)
|
52
|
+
executor.execute(task)
|
53
|
+
tasks << task
|
54
|
+
end
|
55
|
+
|
56
|
+
tasks.each do |t|
|
57
|
+
t.get
|
58
|
+
end
|
59
|
+
|
60
|
+
executor.shutdown()
|
data/tools/multithread_check.rb
CHANGED
@@ -35,7 +35,7 @@ class CheckLoad
|
|
35
35
|
q['content'] = @queries[r.rand(0..490)]['content']
|
36
36
|
puts "QUERY TO SEND: '" + q['content'] + "'"
|
37
37
|
|
38
|
-
url = "http://
|
38
|
+
url = "http://10.1.2.230:4567/sentra/analyzer/atomic?key=QD3F7Yr2u098RfaB"
|
39
39
|
response = RestClient2.post(url, q.to_json)
|
40
40
|
|
41
41
|
json = JSON.parse(response.body)
|
@@ -46,8 +46,8 @@ class CheckLoad
|
|
46
46
|
end
|
47
47
|
|
48
48
|
|
49
|
-
executor = ThreadPoolExecutor.new(
|
50
|
-
|
49
|
+
executor = ThreadPoolExecutor.new(20, # core_pool_treads
|
50
|
+
30, # max_pool_threads
|
51
51
|
60, # keep_alive_time
|
52
52
|
TimeUnit::SECONDS,
|
53
53
|
LinkedBlockingQueue.new)
|
data/tools/singlethread_check.rb
CHANGED
@@ -9,7 +9,7 @@ q = {}
|
|
9
9
|
q['content'] = 'Hans Morris makes excellent point about P2P risks should be.'
|
10
10
|
puts "QUERY TO SEND: '" + q['content'] + "'"
|
11
11
|
|
12
|
-
url = "http://
|
12
|
+
url = "http://10.1.2.230:4567/sentra/analyzer/atomic?key=QD3F7Yr2u098RfaB"
|
13
13
|
response = RestClient2.post(url, q.to_json)
|
14
14
|
|
15
15
|
json = JSON.parse(response.body)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
java_import 'java.util.concurrent.Callable'
|
2
|
+
java_import 'java.util.concurrent.FutureTask'
|
3
|
+
java_import 'java.util.concurrent.LinkedBlockingQueue'
|
4
|
+
java_import 'java.util.concurrent.ThreadPoolExecutor'
|
5
|
+
java_import 'java.util.concurrent.TimeUnit'
|
6
|
+
|
7
|
+
require 'sentra/rest_client'
|
8
|
+
require "sentra/sents"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kshakirov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -138,6 +138,7 @@ files:
|
|
138
138
|
- README_INSTALL
|
139
139
|
- Rakefile
|
140
140
|
- bin/config.ru
|
141
|
+
- bin/config.yml
|
141
142
|
- bin/console
|
142
143
|
- bin/setup
|
143
144
|
- lib/sentra.rb
|
@@ -150,8 +151,10 @@ files:
|
|
150
151
|
- tools/auxillary/tweets.csv
|
151
152
|
- tools/calculate_sents_diff.rb
|
152
153
|
- tools/compare_sentiments.rb
|
154
|
+
- tools/multithread_batch.rb
|
153
155
|
- tools/multithread_check.rb
|
154
156
|
- tools/singlethread_check.rb
|
157
|
+
- tools/tools_helper.rb
|
155
158
|
homepage: https://rubygems.org
|
156
159
|
licenses:
|
157
160
|
- MIT
|