ark_tweet_nlp 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ark_tweet_nlp/parser.rb +9 -2
- data/lib/ark_tweet_nlp/version.rb +1 -1
- data/spec/parser_spec.rb +3 -27
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e36b418a86b2d2e4fe41564a51bdd55093ad1d17
|
4
|
+
data.tar.gz: 97e2931247bc851734a364b72917e2267ea75648
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c411a204f88b578e1f3f897785dcf0b649fbb87d3a79c00081eb859ac9de2f4dfa5c116a28b87c1ec43e67421acafc3dc26243339023dd0356b851200a2d4ed9
|
7
|
+
data.tar.gz: f7d8b63bb5aa4e49edf2d9eb0e9bca19bec8cb68fa73f4ec96a04776ceeb8e113ff8de5ebbba3c3ecb6037c9bf65c1ffa771429fe1193c8caaee1c3220d16762
|
data/lib/ark_tweet_nlp/parser.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'tempfile'
|
1
2
|
require 'set'
|
2
3
|
|
3
4
|
module ArkTweetNlp
|
@@ -57,9 +58,15 @@ module ArkTweetNlp
|
|
57
58
|
arr.each.inject({}){ |res,hash| Parser.merge(res,hash) }
|
58
59
|
end
|
59
60
|
|
61
|
+
#def Parser.run_tagger text
|
62
|
+
##FIXME: regex destroyes urls...
|
63
|
+
#`echo "#{text.gsub(/[^\w\s\d#]/, '')}" | #{TAGGER_PATH}`
|
64
|
+
#end
|
60
65
|
def Parser.run_tagger text
|
61
|
-
|
62
|
-
|
66
|
+
file = Tempfile.new('tweets')
|
67
|
+
file.write(text)
|
68
|
+
file.rewind
|
69
|
+
`#{TAGGER_PATH} #{file.path}`
|
63
70
|
end
|
64
71
|
|
65
72
|
def Parser.convert_line line
|
data/spec/parser_spec.rb
CHANGED
@@ -13,34 +13,10 @@ describe ArkTweetNlp::Parser do
|
|
13
13
|
'awesome' => :A }])
|
14
14
|
end
|
15
15
|
it "suports urls" do
|
16
|
-
expect(ArkTweetNlp::Parser.find_tags("I think I haven't had a segmentation fault in years http://t.co/COjaaFj6Ib")).to eq(
|
17
|
-
"think"=>:V,
|
18
|
-
"havent"=>:V,
|
19
|
-
"had"=>:V,
|
20
|
-
"a"=>:D,
|
21
|
-
"segmentation"=>:N,
|
22
|
-
"fault"=>:N,
|
23
|
-
"in"=>:P,
|
24
|
-
"years"=>:N,
|
25
|
-
"httptcoCOjaaFj6Ib"=>:"$"}])
|
16
|
+
expect(ArkTweetNlp::Parser.find_tags("I think I haven't had a segmentation fault in years http://t.co/COjaaFj6Ib")).to eq( [{"I"=>:O, "think"=>:V, "haven't"=>:V, "had"=>:V, "a"=>:D, "segmentation"=>:N, "fault"=>:N, "in"=>:P, "years"=>:N, "http://t.co/COjaaFj6Ib"=>:U}] )
|
26
17
|
end
|
27
|
-
it "
|
28
|
-
expect(ArkTweetNlp::Parser.find_tags("Delayed... And waiting on a tire from Louisville. \"You can't be serious #Disappointed #pissed #letdown http://t.co/BFqsPZmr8m")).to eq([{"Delayed"=>:A,
|
29
|
-
"And"=>:&,
|
30
|
-
"waiting"=>:V,
|
31
|
-
"on"=>:P,
|
32
|
-
"a"=>:D,
|
33
|
-
"tire"=>:N,
|
34
|
-
"from"=>:P,
|
35
|
-
"Louisville"=>:^,
|
36
|
-
"You"=>:O,
|
37
|
-
"cant"=>:V,
|
38
|
-
"be"=>:V,
|
39
|
-
"serious"=>:A,
|
40
|
-
"#Disappointed"=>:"#",
|
41
|
-
"#pissed"=>:"#",
|
42
|
-
"#letdown"=>:"#",
|
43
|
-
"httptcoBFqsPZmr8m"=>:"#"}])
|
18
|
+
it "supoorts ponctuation from the tweets" do
|
19
|
+
expect(ArkTweetNlp::Parser.find_tags("Delayed... And waiting on a tire from Louisville. \"You can't be serious #Disappointed #pissed #letdown http://t.co/BFqsPZmr8m")).to eq([{"Delayed"=>:V, "..."=>:",", "And"=>:&, "waiting"=>:V, "on"=>:P, "a"=>:D, "tire"=>:N, "from"=>:P, "Louisville"=>:^, "."=>:",", "\""=>:",", "You"=>:O, "can't"=>:V, "be"=>:V, "serious"=>:A, "#Disappointed"=>:"#", "#pissed"=>:"#", "#letdown"=>:"#", "http://t.co/BFqsPZmr8m"=>:U}])
|
44
20
|
end
|
45
21
|
it "tags multiple tweets per line" do
|
46
22
|
expect(ArkTweetNlp::Parser.find_tags("faceboooooooook is awesome\nfaceboooooooook is awesome")).to eq([{'faceboooooooook' => :^,'is' => :V,'awesome' => :A},{'faceboooooooook' => :^,'is' => :V,'awesome' => :A} ])
|