tweetparser 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
@@ -11,8 +11,12 @@ grammar TweetContent
|
|
11
11
|
([a-zA-Z0-9\-] / [^\x20-\x7F])+
|
12
12
|
end
|
13
13
|
|
14
|
+
rule tld
|
15
|
+
[a-zA-Z] [a-zA-Z]+
|
16
|
+
end
|
17
|
+
|
14
18
|
rule url
|
15
|
-
(("http" / "HTTP") [sS]? "://" / "www." / "WWW.") subdomain
|
19
|
+
(("http" / "HTTP") [sS]? "://" / "www." / "WWW.") (subdomain ".")+ tld (":" [0-9]+)? ("/" [a-zA-Z0-9\?#=\-_&%();:\.,~+]*)* {
|
16
20
|
def content
|
17
21
|
[:url, text_value]
|
18
22
|
end
|
@@ -28,7 +32,7 @@ grammar TweetContent
|
|
28
32
|
end
|
29
33
|
|
30
34
|
rule username
|
31
|
-
("@" / "@") name {
|
35
|
+
("@" / "@") name !("@" / "@" / "_") {
|
32
36
|
def content
|
33
37
|
[:username, text_value]
|
34
38
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
$KCODE = "u"
|
3
|
+
$:.unshift(File.expand_path("../../lib", __FILE__))
|
4
|
+
require "test/unit"
|
5
|
+
require "shoulda"
|
6
|
+
require "tweetparser"
|
7
|
+
require "yaml"
|
8
|
+
|
9
|
+
class ExtractionConformanceTest < Test::Unit::TestCase
|
10
|
+
DATA_PATH = File.expand_path("../twitter-text-conformance/extract.yml", __FILE__)
|
11
|
+
|
12
|
+
test_data = YAML.load(File.read(DATA_PATH))["tests"]
|
13
|
+
|
14
|
+
context "when extracting mentions" do
|
15
|
+
test_data["mentions"].each do |hash|
|
16
|
+
should hash["description"] do
|
17
|
+
parts = TweetParser.parse(hash["text"])
|
18
|
+
assert_equal hash["expected"], parts.select{ |a| a[0] == :username }.map{ |a| a[1][/^.(.*)/, 1] }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context "when extracting urls" do
|
24
|
+
test_data["urls"].each do |hash|
|
25
|
+
should hash["description"] do
|
26
|
+
parts = TweetParser.parse(hash["text"])
|
27
|
+
assert_equal hash["expected"], parts.select{ |a| a[0] == :url }.map{ |a| a[1] }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context "when extracting hashtags" do
|
33
|
+
test_data["hashtags"].each do |hash|
|
34
|
+
should hash["description"] do
|
35
|
+
parts = TweetParser.parse(hash["text"])
|
36
|
+
assert_equal hash["expected"], parts.select{ |a| a[0] == :hashtag }.map{ |a| a[1][/^.(.*)/, 1] }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tweetparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Battley
|
@@ -51,7 +51,8 @@ extensions: []
|
|
51
51
|
extra_rdoc_files: []
|
52
52
|
|
53
53
|
files:
|
54
|
-
- test/
|
54
|
+
- test/autolink_conformance_test.rb
|
55
|
+
- test/extraction_conformance_test.rb
|
55
56
|
- test/parser_test.rb
|
56
57
|
- test/twitter-text-conformance/autolink.yml
|
57
58
|
- test/twitter-text-conformance/extract.yml
|