greeb 0.2.2.rc2 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/lib/greeb/core.rb +1 -1
- data/lib/greeb/parser.rb +13 -0
- data/lib/greeb/version.rb +1 -1
- data/spec/parser_spec.rb +13 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00bc5ae2fdcacd87badf8624a3471ccc36125cdb
|
4
|
+
data.tar.gz: 068b056a33f1903094cf2bd84cfaac938e4f1158
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 495271fba24e980a82c573a6b7c038d1215e83cf02e319760013751a77eac82433cbf8597c41158f5f6f6c427bbb504bdd3fc5e50f61d1502aed184cc164170d
|
7
|
+
data.tar.gz: a7ad8a22be6d98e64271762c57b6bfee8f6f8a78d065ba9e91c6f0afc509ac6303b0c7c7c0eaa1e2a43d8f36525d30835b6c8e7a2bcccaeb458c17ecc8a3f643
|
data/README.md
CHANGED
@@ -160,7 +160,17 @@ pp Greeb::Parser.abbrevs(text).map { |e| [e, text[e.from...e.to]] }
|
|
160
160
|
The algorithm is not so accurate, but still useful in many practical
|
161
161
|
situations.
|
162
162
|
|
163
|
-
|
163
|
+
#### Timestamps retrieval
|
164
|
+
```ruby
|
165
|
+
text = 'Our time is running out: 13:37 or 14:89.'
|
166
|
+
|
167
|
+
pp Greeb::Parser.time(text).map { |e| [e, text[e.from...e.to]] }
|
168
|
+
=begin
|
169
|
+
[[#<struct Greeb::Span from=25, to=30, type=:time>, "13:37"]]
|
170
|
+
=end
|
171
|
+
```
|
172
|
+
|
173
|
+
## Spans
|
164
174
|
Greeb operates with spans, tuples of *(from, to, kind)*, where
|
165
175
|
*from* is a beginning of the span, *to* is an ending of the span,
|
166
176
|
and *kind* is a type of the span.
|
data/lib/greeb/core.rb
CHANGED
@@ -7,7 +7,7 @@ module Greeb::Core
|
|
7
7
|
# Greeb::Core uses several helpers from Greeb::Parser to perform
|
8
8
|
# additional analysis using there heuristic methods.
|
9
9
|
#
|
10
|
-
HELPERS = [:urls, :emails, :abbrevs]
|
10
|
+
HELPERS = [:urls, :emails, :abbrevs, :time]
|
11
11
|
|
12
12
|
# Recognize e-mail addresses in the input text.
|
13
13
|
#
|
data/lib/greeb/parser.rb
CHANGED
@@ -19,6 +19,9 @@ module Greeb::Parser
|
|
19
19
|
# This pattern matches anything that looks like HTML. Or not.
|
20
20
|
HTML = /<(.*?)>/i
|
21
21
|
|
22
|
+
# Time pattern.
|
23
|
+
TIME = /\b(\d|[0-2]\d):[0-6]\d(:[0-6]\d){0,1}\b/i
|
24
|
+
|
22
25
|
# Recognize URLs in the input text. Actually, URL is obsolete standard
|
23
26
|
# and this code should be rewritten to use the URI concept.
|
24
27
|
#
|
@@ -60,6 +63,16 @@ module Greeb::Parser
|
|
60
63
|
scan(text, HTML, :html)
|
61
64
|
end
|
62
65
|
|
66
|
+
# Recognize timestamps in the input text.
|
67
|
+
#
|
68
|
+
# @param text [String] input text.
|
69
|
+
#
|
70
|
+
# @return [Array<Greeb::Span>] found HTML entities.
|
71
|
+
#
|
72
|
+
def time(text)
|
73
|
+
scan(text, TIME, :time)
|
74
|
+
end
|
75
|
+
|
63
76
|
private
|
64
77
|
# Implementation of regexp-based {Greeb::Span} scanner.
|
65
78
|
#
|
data/lib/greeb/version.rb
CHANGED
data/spec/parser_spec.rb
CHANGED
@@ -7,7 +7,8 @@ describe Parser do
|
|
7
7
|
('Hello there! My name is <span class="name">Vasya B.</span> and ' \
|
8
8
|
'I am к.ф.-м.н. My website is http://вася.рф/. And my e-mail is ' \
|
9
9
|
'example@example.com! It is available by URL: http://vasya.ru. ' \
|
10
|
-
'Also, <b>G.L.H.F.</b> everyone!'
|
10
|
+
'Also, <b>G.L.H.F.</b> everyone! It\'s 13:37 or 00:02:28 right ' \
|
11
|
+
'now, not 14:89.').freeze
|
11
12
|
end
|
12
13
|
|
13
14
|
describe 'URL' do
|
@@ -55,4 +56,15 @@ describe Parser do
|
|
55
56
|
)
|
56
57
|
end
|
57
58
|
end
|
59
|
+
|
60
|
+
describe 'TIME' do
|
61
|
+
subject { Parser.time(text) }
|
62
|
+
|
63
|
+
it 'recognizes timestamps' do
|
64
|
+
subject.must_equal(
|
65
|
+
[Span.new(225, 230, :time),
|
66
|
+
Span.new(234, 242, :time)]
|
67
|
+
)
|
68
|
+
end
|
69
|
+
end
|
58
70
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: greeb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.2
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Ustalov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-11-
|
11
|
+
date: 2013-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
@@ -72,9 +72,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
72
|
version: '0'
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
|
-
- - '
|
75
|
+
- - '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
77
|
+
version: '0'
|
78
78
|
requirements: []
|
79
79
|
rubyforge_project: greeb
|
80
80
|
rubygems_version: 2.1.9
|