ark_tweet_nlp 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f1b3260363eb025c2536fa1e6285a19a722fe2b5
4
- data.tar.gz: c3829a1dece24316594a9559cde223dc99ce81d5
3
+ metadata.gz: e36b418a86b2d2e4fe41564a51bdd55093ad1d17
4
+ data.tar.gz: 97e2931247bc851734a364b72917e2267ea75648
5
5
  SHA512:
6
- metadata.gz: 70b20f9826d29940bd7e05a6ab056a1e9d6328c2fbfcffec9181d1e6f18a125e90e8a882d8c93d75c9c96551845181e270ecbb5be72af7fad71ed17241084457
7
- data.tar.gz: ab88602b2c048593b668c2967d0356a61b9501551261ecb25d6c96a1fe673c4f5f61d739b2335306c0d1f6fb3bd377e1648ece8e5356e74cf211272067dd62fb
6
+ metadata.gz: c411a204f88b578e1f3f897785dcf0b649fbb87d3a79c00081eb859ac9de2f4dfa5c116a28b87c1ec43e67421acafc3dc26243339023dd0356b851200a2d4ed9
7
+ data.tar.gz: f7d8b63bb5aa4e49edf2d9eb0e9bca19bec8cb68fa73f4ec96a04776ceeb8e113ff8de5ebbba3c3ecb6037c9bf65c1ffa771429fe1193c8caaee1c3220d16762
@@ -1,3 +1,4 @@
1
+ require 'tempfile'
1
2
  require 'set'
2
3
 
3
4
  module ArkTweetNlp
@@ -57,9 +58,15 @@ module ArkTweetNlp
57
58
  arr.each.inject({}){ |res,hash| Parser.merge(res,hash) }
58
59
  end
59
60
 
61
+ #def Parser.run_tagger text
62
+ ##FIXME: regex destroyes urls...
63
+ #`echo "#{text.gsub(/[^\w\s\d#]/, '')}" | #{TAGGER_PATH}`
64
+ #end
60
65
  def Parser.run_tagger text
61
- #FIXME: regex destroyes urls...
62
- `echo "#{text.gsub(/[^\w\s\d#]/, '')}" | #{TAGGER_PATH}`
66
+ file = Tempfile.new('tweets')
67
+ file.write(text)
68
+ file.rewind
69
+ `#{TAGGER_PATH} #{file.path}`
63
70
  end
64
71
 
65
72
  def Parser.convert_line line
@@ -1,3 +1,3 @@
1
1
  module ArkTweetNlp
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.0"
3
3
  end
data/spec/parser_spec.rb CHANGED
@@ -13,34 +13,10 @@ describe ArkTweetNlp::Parser do
13
13
  'awesome' => :A }])
14
14
  end
15
15
  it "suports urls" do
16
- expect(ArkTweetNlp::Parser.find_tags("I think I haven't had a segmentation fault in years http://t.co/COjaaFj6Ib")).to eq( [{"I"=>:O,
17
- "think"=>:V,
18
- "havent"=>:V,
19
- "had"=>:V,
20
- "a"=>:D,
21
- "segmentation"=>:N,
22
- "fault"=>:N,
23
- "in"=>:P,
24
- "years"=>:N,
25
- "httptcoCOjaaFj6Ib"=>:"$"}])
16
+ expect(ArkTweetNlp::Parser.find_tags("I think I haven't had a segmentation fault in years http://t.co/COjaaFj6Ib")).to eq( [{"I"=>:O, "think"=>:V, "haven't"=>:V, "had"=>:V, "a"=>:D, "segmentation"=>:N, "fault"=>:N, "in"=>:P, "years"=>:N, "http://t.co/COjaaFj6Ib"=>:U}] )
26
17
  end
27
- it "removes ponctuation from the tweets" do
28
- expect(ArkTweetNlp::Parser.find_tags("Delayed... And waiting on a tire from Louisville. \"You can't be serious #Disappointed #pissed #letdown http://t.co/BFqsPZmr8m")).to eq([{"Delayed"=>:A,
29
- "And"=>:&,
30
- "waiting"=>:V,
31
- "on"=>:P,
32
- "a"=>:D,
33
- "tire"=>:N,
34
- "from"=>:P,
35
- "Louisville"=>:^,
36
- "You"=>:O,
37
- "cant"=>:V,
38
- "be"=>:V,
39
- "serious"=>:A,
40
- "#Disappointed"=>:"#",
41
- "#pissed"=>:"#",
42
- "#letdown"=>:"#",
43
- "httptcoBFqsPZmr8m"=>:"#"}])
18
+ it "supoorts ponctuation from the tweets" do
19
+ expect(ArkTweetNlp::Parser.find_tags("Delayed... And waiting on a tire from Louisville. \"You can't be serious #Disappointed #pissed #letdown http://t.co/BFqsPZmr8m")).to eq([{"Delayed"=>:V, "..."=>:",", "And"=>:&, "waiting"=>:V, "on"=>:P, "a"=>:D, "tire"=>:N, "from"=>:P, "Louisville"=>:^, "."=>:",", "\""=>:",", "You"=>:O, "can't"=>:V, "be"=>:V, "serious"=>:A, "#Disappointed"=>:"#", "#pissed"=>:"#", "#letdown"=>:"#", "http://t.co/BFqsPZmr8m"=>:U}])
44
20
  end
45
21
  it "tags multiple tweets per line" do
46
22
  expect(ArkTweetNlp::Parser.find_tags("faceboooooooook is awesome\nfaceboooooooook is awesome")).to eq([{'faceboooooooook' => :^,'is' => :V,'awesome' => :A},{'faceboooooooook' => :^,'is' => :V,'awesome' => :A} ])
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ark_tweet_nlp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bernardo