greeb 0.2.2.rc2 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/lib/greeb/core.rb +1 -1
- data/lib/greeb/parser.rb +13 -0
- data/lib/greeb/version.rb +1 -1
- data/spec/parser_spec.rb +13 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00bc5ae2fdcacd87badf8624a3471ccc36125cdb
|
4
|
+
data.tar.gz: 068b056a33f1903094cf2bd84cfaac938e4f1158
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 495271fba24e980a82c573a6b7c038d1215e83cf02e319760013751a77eac82433cbf8597c41158f5f6f6c427bbb504bdd3fc5e50f61d1502aed184cc164170d
|
7
|
+
data.tar.gz: a7ad8a22be6d98e64271762c57b6bfee8f6f8a78d065ba9e91c6f0afc509ac6303b0c7c7c0eaa1e2a43d8f36525d30835b6c8e7a2bcccaeb458c17ecc8a3f643
|
data/README.md
CHANGED
@@ -160,7 +160,17 @@ pp Greeb::Parser.abbrevs(text).map { |e| [e, text[e.from...e.to]] }
|
|
160
160
|
The algorithm is not so accurate, but still useful in many practical
|
161
161
|
situations.
|
162
162
|
|
163
|
-
|
163
|
+
#### Timestamps retrieval
|
164
|
+
```ruby
|
165
|
+
text = 'Our time is running out: 13:37 or 14:89.'
|
166
|
+
|
167
|
+
pp Greeb::Parser.time(text).map { |e| [e, text[e.from...e.to]] }
|
168
|
+
=begin
|
169
|
+
[[#<struct Greeb::Span from=25, to=30, type=:time>, "13:37"]]
|
170
|
+
=end
|
171
|
+
```
|
172
|
+
|
173
|
+
## Spans
|
164
174
|
Greeb operates with spans, tuples of *(from, to, kind)*, where
|
165
175
|
*from* is a beginning of the span, *to* is an ending of the span,
|
166
176
|
and *kind* is a type of the span.
|
data/lib/greeb/core.rb
CHANGED
@@ -7,7 +7,7 @@ module Greeb::Core
|
|
7
7
|
# Greeb::Core uses several helpers from Greeb::Parser to perform
|
8
8
|
# additional analysis using there heuristic methods.
|
9
9
|
#
|
10
|
-
HELPERS = [:urls, :emails, :abbrevs]
|
10
|
+
HELPERS = [:urls, :emails, :abbrevs, :time]
|
11
11
|
|
12
12
|
# Recognize e-mail addresses in the input text.
|
13
13
|
#
|
data/lib/greeb/parser.rb
CHANGED
@@ -19,6 +19,9 @@ module Greeb::Parser
|
|
19
19
|
# This pattern matches anything that looks like HTML. Or not.
|
20
20
|
HTML = /<(.*?)>/i
|
21
21
|
|
22
|
+
# Time pattern.
|
23
|
+
TIME = /\b(\d|[0-2]\d):[0-6]\d(:[0-6]\d){0,1}\b/i
|
24
|
+
|
22
25
|
# Recognize URLs in the input text. Actually, URL is obsolete standard
|
23
26
|
# and this code should be rewritten to use the URI concept.
|
24
27
|
#
|
@@ -60,6 +63,16 @@ module Greeb::Parser
|
|
60
63
|
scan(text, HTML, :html)
|
61
64
|
end
|
62
65
|
|
66
|
+
# Recognize timestamps in the input text.
|
67
|
+
#
|
68
|
+
# @param text [String] input text.
|
69
|
+
#
|
70
|
+
# @return [Array<Greeb::Span>] found HTML entities.
|
71
|
+
#
|
72
|
+
def time(text)
|
73
|
+
scan(text, TIME, :time)
|
74
|
+
end
|
75
|
+
|
63
76
|
private
|
64
77
|
# Implementation of regexp-based {Greeb::Span} scanner.
|
65
78
|
#
|
data/lib/greeb/version.rb
CHANGED
data/spec/parser_spec.rb
CHANGED
@@ -7,7 +7,8 @@ describe Parser do
|
|
7
7
|
('Hello there! My name is <span class="name">Vasya B.</span> and ' \
|
8
8
|
'I am к.ф.-м.н. My website is http://вася.рф/. And my e-mail is ' \
|
9
9
|
'example@example.com! It is available by URL: http://vasya.ru. ' \
|
10
|
-
'Also, <b>G.L.H.F.</b> everyone!'
|
10
|
+
'Also, <b>G.L.H.F.</b> everyone! It\'s 13:37 or 00:02:28 right ' \
|
11
|
+
'now, not 14:89.').freeze
|
11
12
|
end
|
12
13
|
|
13
14
|
describe 'URL' do
|
@@ -55,4 +56,15 @@ describe Parser do
|
|
55
56
|
)
|
56
57
|
end
|
57
58
|
end
|
59
|
+
|
60
|
+
describe 'TIME' do
|
61
|
+
subject { Parser.time(text) }
|
62
|
+
|
63
|
+
it 'recognizes timestamps' do
|
64
|
+
subject.must_equal(
|
65
|
+
[Span.new(225, 230, :time),
|
66
|
+
Span.new(234, 242, :time)]
|
67
|
+
)
|
68
|
+
end
|
69
|
+
end
|
58
70
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: greeb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.2
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Ustalov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-11-
|
11
|
+
date: 2013-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
@@ -72,9 +72,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
72
|
version: '0'
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
|
-
- - '
|
75
|
+
- - '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
77
|
+
version: '0'
|
78
78
|
requirements: []
|
79
79
|
rubyforge_project: greeb
|
80
80
|
rubygems_version: 2.1.9
|