textutils 0.10.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +3 -0
- data/lib/textutils.rb +1 -0
- data/lib/textutils/reader/block_reader.rb +67 -0
- data/lib/textutils/version.rb +2 -2
- data/test/data/feedburner.txt +21 -0
- data/test/test_block_reader.rb +26 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cbe5116178fcb2b9b4ca61faea573b738177c59c
|
4
|
+
data.tar.gz: 595ec4ed63716e146f47d690dd2533130d16754a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cfdf67b1c2b68331908c91c4ffd9679b64758f9da0874e8205d2ce7ce7fab80b8466f4e297ed3add84963410d2cff0e42af270abe702333d54c421a33bef131c
|
7
|
+
data.tar.gz: 12b253520f2e07926131a7a03a9bc23b81dc7937552b1d40efae984b84075c4b7c161faf219b133a0640866427e8758ba7eb918de73a82705b7f3443b444c154
|
data/Manifest.txt
CHANGED
@@ -22,6 +22,7 @@ lib/textutils/helper/xml_helper.rb
|
|
22
22
|
lib/textutils/page.rb
|
23
23
|
lib/textutils/parser/name_parser.rb
|
24
24
|
lib/textutils/patterns.rb
|
25
|
+
lib/textutils/reader/block_reader.rb
|
25
26
|
lib/textutils/reader/code_reader.rb
|
26
27
|
lib/textutils/reader/fixture_reader.rb
|
27
28
|
lib/textutils/reader/hash_reader.rb
|
@@ -33,9 +34,11 @@ lib/textutils/title_mapper.rb
|
|
33
34
|
lib/textutils/utils.rb
|
34
35
|
lib/textutils/version.rb
|
35
36
|
test/data/cl_all.txt
|
37
|
+
test/data/feedburner.txt
|
36
38
|
test/helper.rb
|
37
39
|
test/test_address_helper.rb
|
38
40
|
test/test_asciify.rb
|
41
|
+
test/test_block_reader.rb
|
39
42
|
test/test_fixture_reader.rb
|
40
43
|
test/test_hypertext_helper.rb
|
41
44
|
test/test_slugify.rb
|
data/lib/textutils.rb
CHANGED
@@ -54,6 +54,7 @@ require 'textutils/reader/hash_reader'
|
|
54
54
|
require 'textutils/reader/line_reader'
|
55
55
|
require 'textutils/reader/values_reader'
|
56
56
|
require 'textutils/reader/fixture_reader'
|
57
|
+
require 'textutils/reader/block_reader'
|
57
58
|
|
58
59
|
require 'textutils/classifier'
|
59
60
|
require 'textutils/title' # title table/mapper/finder utils
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
# fix: move into TextUtils namespace/module!!
|
5
|
+
|
6
|
+
class BlockReader
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
def self.from_file( path )
|
11
|
+
## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
12
|
+
## - see textutils/utils.rb
|
13
|
+
text = File.read_utf8( path )
|
14
|
+
self.from_string( text )
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.from_string( text )
|
18
|
+
self.new( text )
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize( text )
|
22
|
+
@text = text
|
23
|
+
end
|
24
|
+
|
25
|
+
def read
|
26
|
+
## note returns an array of (line) strings e.g.
|
27
|
+
## [
|
28
|
+
## "line1\nline2", ## -- block1
|
29
|
+
## "line1\nline2\nline3" ## -- block2
|
30
|
+
## ]
|
31
|
+
|
32
|
+
blocks = []
|
33
|
+
buf = ""
|
34
|
+
|
35
|
+
@text.each_line do |line|
|
36
|
+
# comments allow:
|
37
|
+
# 1) ##### (shell/ruby style)
|
38
|
+
if line =~ /^\s*#/
|
39
|
+
# skip komments and do NOT copy to result (keep comments secret!)
|
40
|
+
logger.debug 'skipping comment line'
|
41
|
+
next
|
42
|
+
end
|
43
|
+
|
44
|
+
# if line =~ /^\s*$/
|
45
|
+
# # kommentar oder leerzeile überspringen
|
46
|
+
# logger.debug 'skipping blank line'
|
47
|
+
# next
|
48
|
+
# end
|
49
|
+
|
50
|
+
# pass 2) remove leading and trailing whitespace
|
51
|
+
line = line.strip
|
52
|
+
|
53
|
+
if line =~ /^-{3,}$/ ## three or more lines
|
54
|
+
logger.debug 'block separator'
|
55
|
+
blocks << buf.strip ## note: strip leading and trailing whitespace
|
56
|
+
buf = ""
|
57
|
+
else
|
58
|
+
buf << "#{line}\n"
|
59
|
+
end
|
60
|
+
end # each lines
|
61
|
+
|
62
|
+
blocks << buf.strip ## note: strip leading and trailing whitespace
|
63
|
+
blocks
|
64
|
+
end # method read
|
65
|
+
|
66
|
+
end # class BlockReader
|
67
|
+
|
data/lib/textutils/version.rb
CHANGED
@@ -0,0 +1,21 @@
|
|
1
|
+
####################################
|
2
|
+
# feedburner text pattern (regex)
|
3
|
+
#
|
4
|
+
# pattern (regex)
|
5
|
+
# ---
|
6
|
+
# test1
|
7
|
+
# ---
|
8
|
+
# test2
|
9
|
+
# ---
|
10
|
+
# etc.
|
11
|
+
|
12
|
+
|
13
|
+
<img[^>]*?
|
14
|
+
src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1
|
15
|
+
.*?>
|
16
|
+
|
17
|
+
---
|
18
|
+
|
19
|
+
<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
|
20
|
+
|
21
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_block_reader.rb
|
4
|
+
# or better
|
5
|
+
# rake test
|
6
|
+
|
7
|
+
require 'helper'
|
8
|
+
|
9
|
+
|
10
|
+
class TestBlockReader < MiniTest::Test
|
11
|
+
|
12
|
+
def test_feedburner
|
13
|
+
blocks = BlockReader.from_file( "#{TextUtils.root}/test/data/feedburner.txt" ).read
|
14
|
+
|
15
|
+
## note: regex - use %q - do NOT escape \. or \1 etc.
|
16
|
+
pattern = %q{<img[^>]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>}
|
17
|
+
|
18
|
+
test1 = %q{<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>}
|
19
|
+
|
20
|
+
assert_equal 2, blocks.size
|
21
|
+
assert_equal pattern, blocks[0].gsub( /[\n ]/, '' ) ## note: need to remove newlines and spaces
|
22
|
+
assert_equal test1, blocks[1]
|
23
|
+
end
|
24
|
+
|
25
|
+
end # class TestBlockReader
|
26
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: props
|
@@ -128,6 +128,7 @@ files:
|
|
128
128
|
- lib/textutils/page.rb
|
129
129
|
- lib/textutils/parser/name_parser.rb
|
130
130
|
- lib/textutils/patterns.rb
|
131
|
+
- lib/textutils/reader/block_reader.rb
|
131
132
|
- lib/textutils/reader/code_reader.rb
|
132
133
|
- lib/textutils/reader/fixture_reader.rb
|
133
134
|
- lib/textutils/reader/hash_reader.rb
|
@@ -139,9 +140,11 @@ files:
|
|
139
140
|
- lib/textutils/utils.rb
|
140
141
|
- lib/textutils/version.rb
|
141
142
|
- test/data/cl_all.txt
|
143
|
+
- test/data/feedburner.txt
|
142
144
|
- test/helper.rb
|
143
145
|
- test/test_address_helper.rb
|
144
146
|
- test/test_asciify.rb
|
147
|
+
- test/test_block_reader.rb
|
145
148
|
- test/test_fixture_reader.rb
|
146
149
|
- test/test_hypertext_helper.rb
|
147
150
|
- test/test_slugify.rb
|
@@ -189,3 +192,4 @@ test_files:
|
|
189
192
|
- test/test_title_helper.rb
|
190
193
|
- test/test_slugify.rb
|
191
194
|
- test/test_address_helper.rb
|
195
|
+
- test/test_block_reader.rb
|