tweetparser 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,8 +11,12 @@ grammar TweetContent
11
11
  ([a-zA-Z0-9\-] / [^\x20-\x7F])+
12
12
  end
13
13
 
14
+ rule tld
15
+ [a-zA-Z] [a-zA-Z]+
16
+ end
17
+
14
18
  rule url
15
- (("http" / "HTTP") [sS]? "://" / "www." / "WWW.") subdomain ("." subdomain)+ ("/" [\.a-zA-Z0-9\?#=\-_&%]*)* {
19
+ (("http" / "HTTP") [sS]? "://" / "www." / "WWW.") (subdomain ".")+ tld (":" [0-9]+)? ("/" [a-zA-Z0-9\?#=\-_&%();:\.,~+]*)* {
16
20
  def content
17
21
  [:url, text_value]
18
22
  end
@@ -28,7 +32,7 @@ grammar TweetContent
28
32
  end
29
33
 
30
34
  rule username
31
- ("@" / "@") name {
35
+ ("@" / "@") name !("@" / "@" / "_") {
32
36
  def content
33
37
  [:username, text_value]
34
38
  end
@@ -4,7 +4,6 @@ require "test/unit"
4
4
  require "shoulda"
5
5
  require "tweetparser"
6
6
  require "yaml"
7
- require "cgi"
8
7
 
9
8
  class AutolinkConformanceTest < Test::Unit::TestCase
10
9
  DATA_PATH = File.expand_path("../twitter-text-conformance/autolink.yml", __FILE__)
@@ -0,0 +1,40 @@
1
+ # encoding: UTF-8
2
+ $KCODE = "u"
3
+ $:.unshift(File.expand_path("../../lib", __FILE__))
4
+ require "test/unit"
5
+ require "shoulda"
6
+ require "tweetparser"
7
+ require "yaml"
8
+
9
+ class ExtractionConformanceTest < Test::Unit::TestCase
10
+ DATA_PATH = File.expand_path("../twitter-text-conformance/extract.yml", __FILE__)
11
+
12
+ test_data = YAML.load(File.read(DATA_PATH))["tests"]
13
+
14
+ context "when extracting mentions" do
15
+ test_data["mentions"].each do |hash|
16
+ should hash["description"] do
17
+ parts = TweetParser.parse(hash["text"])
18
+ assert_equal hash["expected"], parts.select{ |a| a[0] == :username }.map{ |a| a[1][/^.(.*)/, 1] }
19
+ end
20
+ end
21
+ end
22
+
23
+ context "when extracting urls" do
24
+ test_data["urls"].each do |hash|
25
+ should hash["description"] do
26
+ parts = TweetParser.parse(hash["text"])
27
+ assert_equal hash["expected"], parts.select{ |a| a[0] == :url }.map{ |a| a[1] }
28
+ end
29
+ end
30
+ end
31
+
32
+ context "when extracting hashtags" do
33
+ test_data["hashtags"].each do |hash|
34
+ should hash["description"] do
35
+ parts = TweetParser.parse(hash["text"])
36
+ assert_equal hash["expected"], parts.select{ |a| a[0] == :hashtag }.map{ |a| a[1][/^.(.*)/, 1] }
37
+ end
38
+ end
39
+ end
40
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tweetparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Battley
@@ -51,7 +51,8 @@ extensions: []
51
51
  extra_rdoc_files: []
52
52
 
53
53
  files:
54
- - test/conformance_test.rb
54
+ - test/autolink_conformance_test.rb
55
+ - test/extraction_conformance_test.rb
55
56
  - test/parser_test.rb
56
57
  - test/twitter-text-conformance/autolink.yml
57
58
  - test/twitter-text-conformance/extract.yml