tweetparser 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/tweetparser/grammar.treetop +55 -15
  2. metadata +39 -27
@@ -7,32 +7,72 @@ grammar TweetContent
7
7
  }
8
8
  end
9
9
 
10
+ # Partial components
11
+
12
+ rule ascii_letter
13
+ [a-zA-Z]
14
+ end
15
+
16
+ rule digit
17
+ [0-9]
18
+ end
19
+
20
+ rule non_ascii
21
+ [^\x20-\x7F]
22
+ end
23
+
24
+ rule hyphen
25
+ "-"
26
+ end
27
+
28
+ rule underscore
29
+ "_"
30
+ end
31
+
32
+ rule at_sign
33
+ "@" / "@"
34
+ end
35
+
36
+ rule hash_sign
37
+ "#" / "#"
38
+ end
39
+
10
40
  rule subdomain
11
- ([a-zA-Z0-9\-] / [^\x20-\x7F])+
41
+ (ascii_letter / digit / hyphen / non_ascii)+
12
42
  end
13
43
 
14
44
  rule tld
15
- [a-zA-Z] [a-zA-Z]+
45
+ ascii_letter ascii_letter+
46
+ end
47
+
48
+ rule ascii_name
49
+ (ascii_letter / digit / underscore)+
50
+ end
51
+
52
+ rule ascii_name_with_letters
53
+ ascii_letter (ascii_letter / digit / underscore)* / (digit / underscore)+ ascii_letter (ascii_letter / digit / underscore)*
54
+ end
55
+
56
+ rule protocol
57
+ [hH] [tT] [tT] [pP] [sS]? "://"
16
58
  end
17
59
 
60
+ rule path_component
61
+ [a-zA-Z0-9?=\-_&%();:\.,~+/]
62
+ end
63
+
64
+ # Outputs
65
+
18
66
  rule url
19
- (("http" / "HTTP") [sS]? "://" / "www." / "WWW.") (subdomain ".")+ tld (":" [0-9]+)? ("/" [a-zA-Z0-9\?#=\-_&%();:\.,~+]*)* {
67
+ (protocol / [Ww] [Ww] [Ww] ".") (subdomain ".")+ tld (":" [0-9]+)? ("/" path_component*)? ("#" path_component*)? {
20
68
  def content
21
69
  [:url, text_value]
22
70
  end
23
71
  }
24
72
  end
25
73
 
26
- rule name
27
- [a-zA-Z0-9_]+
28
- end
29
-
30
- rule name_with_letters
31
- [a-zA-Z] [a-zA-Z0-9_]* / [0-9_]+ [a-zA-Z] [a-zA-Z0-9_]*
32
- end
33
-
34
74
  rule username
35
- ("@" / "@") name !("@" / "@" / "_") {
75
+ at_sign ascii_name !(at_sign / underscore) {
36
76
  def content
37
77
  [:username, text_value]
38
78
  end
@@ -40,7 +80,7 @@ grammar TweetContent
40
80
  end
41
81
 
42
82
  rule list
43
- username "/" name {
83
+ username "/" ascii_name {
44
84
  def content
45
85
  [:list, text_value]
46
86
  end
@@ -48,7 +88,7 @@ grammar TweetContent
48
88
  end
49
89
 
50
90
  rule hashtag
51
- ("#" / "#") name_with_letters {
91
+ hash_sign ascii_name_with_letters {
52
92
  def content
53
93
  [:hashtag, text_value]
54
94
  end
@@ -56,7 +96,7 @@ grammar TweetContent
56
96
  end
57
97
 
58
98
  rule slash
59
- "/" name {
99
+ "/" ascii_name {
60
100
  def content
61
101
  [:slash, text_value]
62
102
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tweetparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ hash: 19
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 2
10
+ version: 0.2.2
5
11
  platform: ruby
6
12
  authors:
7
13
  - Paul Battley
@@ -9,39 +15,39 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-02-24 00:00:00 +00:00
18
+ date: 2010-09-21 00:00:00 +01:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: treetop
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
- - - ~>
27
+ - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 1
32
+ - 4
33
+ - 2
23
34
  version: 1.4.2
24
- version:
25
- - !ruby/object:Gem::Dependency
26
- name: polyglot
27
35
  type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ~>
32
- - !ruby/object:Gem::Version
33
- version: 0.2.9
34
- version:
36
+ version_requirements: *id001
35
37
  - !ruby/object:Gem::Dependency
36
38
  name: shoulda
37
- type: :development
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
40
42
  requirements:
41
43
  - - ">="
42
44
  - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
43
48
  version: "0"
44
- version:
49
+ type: :development
50
+ version_requirements: *id002
45
51
  description:
46
52
  email: pbattley@gmail.com
47
53
  executables: []
@@ -51,12 +57,12 @@ extensions: []
51
57
  extra_rdoc_files: []
52
58
 
53
59
  files:
54
- - test/autolink_conformance_test.rb
55
- - test/extraction_conformance_test.rb
56
- - test/parser_test.rb
57
- - test/twitter-text-conformance/autolink.yml
58
60
  - test/twitter-text-conformance/extract.yml
61
+ - test/twitter-text-conformance/autolink.yml
59
62
  - test/twitter-text-conformance/README
63
+ - test/parser_test.rb
64
+ - test/autolink_conformance_test.rb
65
+ - test/extraction_conformance_test.rb
60
66
  - lib/tweetparser/grammar.treetop
61
67
  - lib/tweetparser.rb
62
68
  has_rdoc: true
@@ -69,21 +75,27 @@ rdoc_options: []
69
75
  require_paths:
70
76
  - lib
71
77
  required_ruby_version: !ruby/object:Gem::Requirement
78
+ none: false
72
79
  requirements:
73
80
  - - ">="
74
81
  - !ruby/object:Gem::Version
82
+ hash: 3
83
+ segments:
84
+ - 0
75
85
  version: "0"
76
- version:
77
86
  required_rubygems_version: !ruby/object:Gem::Requirement
87
+ none: false
78
88
  requirements:
79
89
  - - ">="
80
90
  - !ruby/object:Gem::Version
91
+ hash: 3
92
+ segments:
93
+ - 0
81
94
  version: "0"
82
- version:
83
95
  requirements: []
84
96
 
85
97
  rubyforge_project:
86
- rubygems_version: 1.3.5
98
+ rubygems_version: 1.3.7
87
99
  signing_key:
88
100
  specification_version: 3
89
101
  summary: Extract content from tweets