tweetparser 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/tweetparser/grammar.treetop +55 -15
  2. metadata +39 -27
@@ -7,32 +7,72 @@ grammar TweetContent
7
7
  }
8
8
  end
9
9
 
10
+ # Partial components
11
+
12
+ rule ascii_letter
13
+ [a-zA-Z]
14
+ end
15
+
16
+ rule digit
17
+ [0-9]
18
+ end
19
+
20
+ rule non_ascii
21
+ [^\x20-\x7F]
22
+ end
23
+
24
+ rule hyphen
25
+ "-"
26
+ end
27
+
28
+ rule underscore
29
+ "_"
30
+ end
31
+
32
+ rule at_sign
33
+ "@" / "@"
34
+ end
35
+
36
+ rule hash_sign
37
+ "#" / "#"
38
+ end
39
+
10
40
  rule subdomain
11
- ([a-zA-Z0-9\-] / [^\x20-\x7F])+
41
+ (ascii_letter / digit / hyphen / non_ascii)+
12
42
  end
13
43
 
14
44
  rule tld
15
- [a-zA-Z] [a-zA-Z]+
45
+ ascii_letter ascii_letter+
46
+ end
47
+
48
+ rule ascii_name
49
+ (ascii_letter / digit / underscore)+
50
+ end
51
+
52
+ rule ascii_name_with_letters
53
+ ascii_letter (ascii_letter / digit / underscore)* / (digit / underscore)+ ascii_letter (ascii_letter / digit / underscore)*
54
+ end
55
+
56
+ rule protocol
57
+ [hH] [tT] [tT] [pP] [sS]? "://"
16
58
  end
17
59
 
60
+ rule path_component
61
+ [a-zA-Z0-9?=\-_&%();:\.,~+/]
62
+ end
63
+
64
+ # Outputs
65
+
18
66
  rule url
19
- (("http" / "HTTP") [sS]? "://" / "www." / "WWW.") (subdomain ".")+ tld (":" [0-9]+)? ("/" [a-zA-Z0-9\?#=\-_&%();:\.,~+]*)* {
67
+ (protocol / [Ww] [Ww] [Ww] ".") (subdomain ".")+ tld (":" [0-9]+)? ("/" path_component*)? ("#" path_component*)? {
20
68
  def content
21
69
  [:url, text_value]
22
70
  end
23
71
  }
24
72
  end
25
73
 
26
- rule name
27
- [a-zA-Z0-9_]+
28
- end
29
-
30
- rule name_with_letters
31
- [a-zA-Z] [a-zA-Z0-9_]* / [0-9_]+ [a-zA-Z] [a-zA-Z0-9_]*
32
- end
33
-
34
74
  rule username
35
- ("@" / "@") name !("@" / "@" / "_") {
75
+ at_sign ascii_name !(at_sign / underscore) {
36
76
  def content
37
77
  [:username, text_value]
38
78
  end
@@ -40,7 +80,7 @@ grammar TweetContent
40
80
  end
41
81
 
42
82
  rule list
43
- username "/" name {
83
+ username "/" ascii_name {
44
84
  def content
45
85
  [:list, text_value]
46
86
  end
@@ -48,7 +88,7 @@ grammar TweetContent
48
88
  end
49
89
 
50
90
  rule hashtag
51
- ("#" / "#") name_with_letters {
91
+ hash_sign ascii_name_with_letters {
52
92
  def content
53
93
  [:hashtag, text_value]
54
94
  end
@@ -56,7 +96,7 @@ grammar TweetContent
56
96
  end
57
97
 
58
98
  rule slash
59
- "/" name {
99
+ "/" ascii_name {
60
100
  def content
61
101
  [:slash, text_value]
62
102
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tweetparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ hash: 19
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 2
10
+ version: 0.2.2
5
11
  platform: ruby
6
12
  authors:
7
13
  - Paul Battley
@@ -9,39 +15,39 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-02-24 00:00:00 +00:00
18
+ date: 2010-09-21 00:00:00 +01:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: treetop
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
- - - ~>
27
+ - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 1
32
+ - 4
33
+ - 2
23
34
  version: 1.4.2
24
- version:
25
- - !ruby/object:Gem::Dependency
26
- name: polyglot
27
35
  type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ~>
32
- - !ruby/object:Gem::Version
33
- version: 0.2.9
34
- version:
36
+ version_requirements: *id001
35
37
  - !ruby/object:Gem::Dependency
36
38
  name: shoulda
37
- type: :development
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
40
42
  requirements:
41
43
  - - ">="
42
44
  - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
43
48
  version: "0"
44
- version:
49
+ type: :development
50
+ version_requirements: *id002
45
51
  description:
46
52
  email: pbattley@gmail.com
47
53
  executables: []
@@ -51,12 +57,12 @@ extensions: []
51
57
  extra_rdoc_files: []
52
58
 
53
59
  files:
54
- - test/autolink_conformance_test.rb
55
- - test/extraction_conformance_test.rb
56
- - test/parser_test.rb
57
- - test/twitter-text-conformance/autolink.yml
58
60
  - test/twitter-text-conformance/extract.yml
61
+ - test/twitter-text-conformance/autolink.yml
59
62
  - test/twitter-text-conformance/README
63
+ - test/parser_test.rb
64
+ - test/autolink_conformance_test.rb
65
+ - test/extraction_conformance_test.rb
60
66
  - lib/tweetparser/grammar.treetop
61
67
  - lib/tweetparser.rb
62
68
  has_rdoc: true
@@ -69,21 +75,27 @@ rdoc_options: []
69
75
  require_paths:
70
76
  - lib
71
77
  required_ruby_version: !ruby/object:Gem::Requirement
78
+ none: false
72
79
  requirements:
73
80
  - - ">="
74
81
  - !ruby/object:Gem::Version
82
+ hash: 3
83
+ segments:
84
+ - 0
75
85
  version: "0"
76
- version:
77
86
  required_rubygems_version: !ruby/object:Gem::Requirement
87
+ none: false
78
88
  requirements:
79
89
  - - ">="
80
90
  - !ruby/object:Gem::Version
91
+ hash: 3
92
+ segments:
93
+ - 0
81
94
  version: "0"
82
- version:
83
95
  requirements: []
84
96
 
85
97
  rubyforge_project:
86
- rubygems_version: 1.3.5
98
+ rubygems_version: 1.3.7
87
99
  signing_key:
88
100
  specification_version: 3
89
101
  summary: Extract content from tweets