tweetparser 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,8 +11,12 @@ grammar TweetContent
11
11
  ([a-zA-Z0-9\-] / [^\x20-\x7F])+
12
12
  end
13
13
 
14
+ rule tld
15
+ [a-zA-Z] [a-zA-Z]+
16
+ end
17
+
14
18
  rule url
15
- (("http" / "HTTP") [sS]? "://" / "www." / "WWW.") subdomain ("." subdomain)+ ("/" [\.a-zA-Z0-9\?#=\-_&%]*)* {
19
+ (("http" / "HTTP") [sS]? "://" / "www." / "WWW.") (subdomain ".")+ tld (":" [0-9]+)? ("/" [a-zA-Z0-9\?#=\-_&%();:\.,~+]*)* {
16
20
  def content
17
21
  [:url, text_value]
18
22
  end
@@ -28,7 +32,7 @@ grammar TweetContent
28
32
  end
29
33
 
30
34
  rule username
31
- ("@" / "@") name {
35
+ ("@" / "@") name !("@" / "@" / "_") {
32
36
  def content
33
37
  [:username, text_value]
34
38
  end
@@ -4,7 +4,6 @@ require "test/unit"
4
4
  require "shoulda"
5
5
  require "tweetparser"
6
6
  require "yaml"
7
- require "cgi"
8
7
 
9
8
  class AutolinkConformanceTest < Test::Unit::TestCase
10
9
  DATA_PATH = File.expand_path("../twitter-text-conformance/autolink.yml", __FILE__)
@@ -0,0 +1,40 @@
1
+ # encoding: UTF-8
2
+ $KCODE = "u"
3
+ $:.unshift(File.expand_path("../../lib", __FILE__))
4
+ require "test/unit"
5
+ require "shoulda"
6
+ require "tweetparser"
7
+ require "yaml"
8
+
9
+ class ExtractionConformanceTest < Test::Unit::TestCase
10
+ DATA_PATH = File.expand_path("../twitter-text-conformance/extract.yml", __FILE__)
11
+
12
+ test_data = YAML.load(File.read(DATA_PATH))["tests"]
13
+
14
+ context "when extracting mentions" do
15
+ test_data["mentions"].each do |hash|
16
+ should hash["description"] do
17
+ parts = TweetParser.parse(hash["text"])
18
+ assert_equal hash["expected"], parts.select{ |a| a[0] == :username }.map{ |a| a[1][/^.(.*)/, 1] }
19
+ end
20
+ end
21
+ end
22
+
23
+ context "when extracting urls" do
24
+ test_data["urls"].each do |hash|
25
+ should hash["description"] do
26
+ parts = TweetParser.parse(hash["text"])
27
+ assert_equal hash["expected"], parts.select{ |a| a[0] == :url }.map{ |a| a[1] }
28
+ end
29
+ end
30
+ end
31
+
32
+ context "when extracting hashtags" do
33
+ test_data["hashtags"].each do |hash|
34
+ should hash["description"] do
35
+ parts = TweetParser.parse(hash["text"])
36
+ assert_equal hash["expected"], parts.select{ |a| a[0] == :hashtag }.map{ |a| a[1][/^.(.*)/, 1] }
37
+ end
38
+ end
39
+ end
40
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tweetparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Battley
@@ -51,7 +51,8 @@ extensions: []
51
51
  extra_rdoc_files: []
52
52
 
53
53
  files:
54
- - test/conformance_test.rb
54
+ - test/autolink_conformance_test.rb
55
+ - test/extraction_conformance_test.rb
55
56
  - test/parser_test.rb
56
57
  - test/twitter-text-conformance/autolink.yml
57
58
  - test/twitter-text-conformance/extract.yml