tweetparser 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/tweetparser/grammar.treetop +55 -15
- metadata +39 -27
@@ -7,32 +7,72 @@ grammar TweetContent
|
|
7
7
|
}
|
8
8
|
end
|
9
9
|
|
10
|
+
# Partial components
|
11
|
+
|
12
|
+
rule ascii_letter
|
13
|
+
[a-zA-Z]
|
14
|
+
end
|
15
|
+
|
16
|
+
rule digit
|
17
|
+
[0-9]
|
18
|
+
end
|
19
|
+
|
20
|
+
rule non_ascii
|
21
|
+
[^\x20-\x7F]
|
22
|
+
end
|
23
|
+
|
24
|
+
rule hyphen
|
25
|
+
"-"
|
26
|
+
end
|
27
|
+
|
28
|
+
rule underscore
|
29
|
+
"_"
|
30
|
+
end
|
31
|
+
|
32
|
+
rule at_sign
|
33
|
+
"@" / "@"
|
34
|
+
end
|
35
|
+
|
36
|
+
rule hash_sign
|
37
|
+
"#" / "#"
|
38
|
+
end
|
39
|
+
|
10
40
|
rule subdomain
|
11
|
-
(
|
41
|
+
(ascii_letter / digit / hyphen / non_ascii)+
|
12
42
|
end
|
13
43
|
|
14
44
|
rule tld
|
15
|
-
|
45
|
+
ascii_letter ascii_letter+
|
46
|
+
end
|
47
|
+
|
48
|
+
rule ascii_name
|
49
|
+
(ascii_letter / digit / underscore)+
|
50
|
+
end
|
51
|
+
|
52
|
+
rule ascii_name_with_letters
|
53
|
+
ascii_letter (ascii_letter / digit / underscore)* / (digit / underscore)+ ascii_letter (ascii_letter / digit / underscore)*
|
54
|
+
end
|
55
|
+
|
56
|
+
rule protocol
|
57
|
+
[hH] [tT] [tT] [pP] [sS]? "://"
|
16
58
|
end
|
17
59
|
|
60
|
+
rule path_component
|
61
|
+
[a-zA-Z0-9?=\-_&%();:\.,~+/]
|
62
|
+
end
|
63
|
+
|
64
|
+
# Outputs
|
65
|
+
|
18
66
|
rule url
|
19
|
-
(
|
67
|
+
(protocol / [Ww] [Ww] [Ww] ".") (subdomain ".")+ tld (":" [0-9]+)? ("/" path_component*)? ("#" path_component*)? {
|
20
68
|
def content
|
21
69
|
[:url, text_value]
|
22
70
|
end
|
23
71
|
}
|
24
72
|
end
|
25
73
|
|
26
|
-
rule name
|
27
|
-
[a-zA-Z0-9_]+
|
28
|
-
end
|
29
|
-
|
30
|
-
rule name_with_letters
|
31
|
-
[a-zA-Z] [a-zA-Z0-9_]* / [0-9_]+ [a-zA-Z] [a-zA-Z0-9_]*
|
32
|
-
end
|
33
|
-
|
34
74
|
rule username
|
35
|
-
|
75
|
+
at_sign ascii_name !(at_sign / underscore) {
|
36
76
|
def content
|
37
77
|
[:username, text_value]
|
38
78
|
end
|
@@ -40,7 +80,7 @@ grammar TweetContent
|
|
40
80
|
end
|
41
81
|
|
42
82
|
rule list
|
43
|
-
username "/"
|
83
|
+
username "/" ascii_name {
|
44
84
|
def content
|
45
85
|
[:list, text_value]
|
46
86
|
end
|
@@ -48,7 +88,7 @@ grammar TweetContent
|
|
48
88
|
end
|
49
89
|
|
50
90
|
rule hashtag
|
51
|
-
|
91
|
+
hash_sign ascii_name_with_letters {
|
52
92
|
def content
|
53
93
|
[:hashtag, text_value]
|
54
94
|
end
|
@@ -56,7 +96,7 @@ grammar TweetContent
|
|
56
96
|
end
|
57
97
|
|
58
98
|
rule slash
|
59
|
-
"/"
|
99
|
+
"/" ascii_name {
|
60
100
|
def content
|
61
101
|
[:slash, text_value]
|
62
102
|
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tweetparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 19
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 2
|
10
|
+
version: 0.2.2
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Paul Battley
|
@@ -9,39 +15,39 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-
|
18
|
+
date: 2010-09-21 00:00:00 +01:00
|
13
19
|
default_executable:
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: treetop
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
|
-
- -
|
27
|
+
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 4
|
33
|
+
- 2
|
23
34
|
version: 1.4.2
|
24
|
-
version:
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
-
name: polyglot
|
27
35
|
type: :runtime
|
28
|
-
|
29
|
-
version_requirements: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ~>
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 0.2.9
|
34
|
-
version:
|
36
|
+
version_requirements: *id001
|
35
37
|
- !ruby/object:Gem::Dependency
|
36
38
|
name: shoulda
|
37
|
-
|
38
|
-
|
39
|
-
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
40
42
|
requirements:
|
41
43
|
- - ">="
|
42
44
|
- !ruby/object:Gem::Version
|
45
|
+
hash: 3
|
46
|
+
segments:
|
47
|
+
- 0
|
43
48
|
version: "0"
|
44
|
-
|
49
|
+
type: :development
|
50
|
+
version_requirements: *id002
|
45
51
|
description:
|
46
52
|
email: pbattley@gmail.com
|
47
53
|
executables: []
|
@@ -51,12 +57,12 @@ extensions: []
|
|
51
57
|
extra_rdoc_files: []
|
52
58
|
|
53
59
|
files:
|
54
|
-
- test/autolink_conformance_test.rb
|
55
|
-
- test/extraction_conformance_test.rb
|
56
|
-
- test/parser_test.rb
|
57
|
-
- test/twitter-text-conformance/autolink.yml
|
58
60
|
- test/twitter-text-conformance/extract.yml
|
61
|
+
- test/twitter-text-conformance/autolink.yml
|
59
62
|
- test/twitter-text-conformance/README
|
63
|
+
- test/parser_test.rb
|
64
|
+
- test/autolink_conformance_test.rb
|
65
|
+
- test/extraction_conformance_test.rb
|
60
66
|
- lib/tweetparser/grammar.treetop
|
61
67
|
- lib/tweetparser.rb
|
62
68
|
has_rdoc: true
|
@@ -69,21 +75,27 @@ rdoc_options: []
|
|
69
75
|
require_paths:
|
70
76
|
- lib
|
71
77
|
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
72
79
|
requirements:
|
73
80
|
- - ">="
|
74
81
|
- !ruby/object:Gem::Version
|
82
|
+
hash: 3
|
83
|
+
segments:
|
84
|
+
- 0
|
75
85
|
version: "0"
|
76
|
-
version:
|
77
86
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
78
88
|
requirements:
|
79
89
|
- - ">="
|
80
90
|
- !ruby/object:Gem::Version
|
91
|
+
hash: 3
|
92
|
+
segments:
|
93
|
+
- 0
|
81
94
|
version: "0"
|
82
|
-
version:
|
83
95
|
requirements: []
|
84
96
|
|
85
97
|
rubyforge_project:
|
86
|
-
rubygems_version: 1.3.
|
98
|
+
rubygems_version: 1.3.7
|
87
99
|
signing_key:
|
88
100
|
specification_version: 3
|
89
101
|
summary: Extract content from tweets
|