textutils 0.10.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 68baf8d24b589a21d4394a6b18760b349e93d13d
4
- data.tar.gz: d8c9a466924b62c4ffa53b88641f7a3e81844ef3
3
+ metadata.gz: cbe5116178fcb2b9b4ca61faea573b738177c59c
4
+ data.tar.gz: 595ec4ed63716e146f47d690dd2533130d16754a
5
5
  SHA512:
6
- metadata.gz: 66214b48ee18a369ccef2b369702aa481350ce58eb8f3174fee473fa642c15238af5cad0a5e0e53250bdd7ffc78dfff06e62e3e738e24fa3178a1f8fbda6a751
7
- data.tar.gz: 4e590abefa048fbc971391bd65681dc230a254caacb7b728daded53bb756192e6e43aec34432e8c7beb319a1ebab4d9b4128299fea264594926ed668b05ff9db
6
+ metadata.gz: cfdf67b1c2b68331908c91c4ffd9679b64758f9da0874e8205d2ce7ce7fab80b8466f4e297ed3add84963410d2cff0e42af270abe702333d54c421a33bef131c
7
+ data.tar.gz: 12b253520f2e07926131a7a03a9bc23b81dc7937552b1d40efae984b84075c4b7c161faf219b133a0640866427e8758ba7eb918de73a82705b7f3443b444c154
data/Manifest.txt CHANGED
@@ -22,6 +22,7 @@ lib/textutils/helper/xml_helper.rb
22
22
  lib/textutils/page.rb
23
23
  lib/textutils/parser/name_parser.rb
24
24
  lib/textutils/patterns.rb
25
+ lib/textutils/reader/block_reader.rb
25
26
  lib/textutils/reader/code_reader.rb
26
27
  lib/textutils/reader/fixture_reader.rb
27
28
  lib/textutils/reader/hash_reader.rb
@@ -33,9 +34,11 @@ lib/textutils/title_mapper.rb
33
34
  lib/textutils/utils.rb
34
35
  lib/textutils/version.rb
35
36
  test/data/cl_all.txt
37
+ test/data/feedburner.txt
36
38
  test/helper.rb
37
39
  test/test_address_helper.rb
38
40
  test/test_asciify.rb
41
+ test/test_block_reader.rb
39
42
  test/test_fixture_reader.rb
40
43
  test/test_hypertext_helper.rb
41
44
  test/test_slugify.rb
data/lib/textutils.rb CHANGED
@@ -54,6 +54,7 @@ require 'textutils/reader/hash_reader'
54
54
  require 'textutils/reader/line_reader'
55
55
  require 'textutils/reader/values_reader'
56
56
  require 'textutils/reader/fixture_reader'
57
+ require 'textutils/reader/block_reader'
57
58
 
58
59
  require 'textutils/classifier'
59
60
  require 'textutils/title' # title table/mapper/finder utils
@@ -0,0 +1,67 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ # fix: move into TextUtils namespace/module!!
5
+
6
+ class BlockReader
7
+
8
+ include LogUtils::Logging
9
+
10
+ def self.from_file( path )
11
+ ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
12
+ ## - see textutils/utils.rb
13
+ text = File.read_utf8( path )
14
+ self.from_string( text )
15
+ end
16
+
17
+ def self.from_string( text )
18
+ self.new( text )
19
+ end
20
+
21
+ def initialize( text )
22
+ @text = text
23
+ end
24
+
25
+ def read
26
+ ## note returns an array of (line) strings e.g.
27
+ ## [
28
+ ## "line1\nline2", ## -- block1
29
+ ## "line1\nline2\nline3" ## -- block2
30
+ ## ]
31
+
32
+ blocks = []
33
+ buf = ""
34
+
35
+ @text.each_line do |line|
36
+ # comments allow:
37
+ # 1) ##### (shell/ruby style)
38
+ if line =~ /^\s*#/
39
+ # skip komments and do NOT copy to result (keep comments secret!)
40
+ logger.debug 'skipping comment line'
41
+ next
42
+ end
43
+
44
+ # if line =~ /^\s*$/
45
+ # # kommentar oder leerzeile überspringen
46
+ # logger.debug 'skipping blank line'
47
+ # next
48
+ # end
49
+
50
+ # pass 2) remove leading and trailing whitespace
51
+ line = line.strip
52
+
53
+ if line =~ /^-{3,}$/ ## three or more lines
54
+ logger.debug 'block separator'
55
+ blocks << buf.strip ## note: strip leading and trailing whitespace
56
+ buf = ""
57
+ else
58
+ buf << "#{line}\n"
59
+ end
60
+ end # each lines
61
+
62
+ blocks << buf.strip ## note: strip leading and trailing whitespace
63
+ blocks
64
+ end # method read
65
+
66
+ end # class BlockReader
67
+
@@ -2,8 +2,8 @@
2
2
 
3
3
  module TextUtils
4
4
 
5
- MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 10
5
+ MAJOR = 1 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 0
7
7
  PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
@@ -0,0 +1,21 @@
1
+ ####################################
2
+ # feedburner text pattern (regex)
3
+ #
4
+ # pattern (regex)
5
+ # ---
6
+ # test1
7
+ # ---
8
+ # test2
9
+ # ---
10
+ # etc.
11
+
12
+
13
+ <img[^>]*?
14
+ src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1
15
+ .*?>
16
+
17
+ ---
18
+
19
+ <img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
20
+
21
+
@@ -0,0 +1,26 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_block_reader.rb
4
+ # or better
5
+ # rake test
6
+
7
+ require 'helper'
8
+
9
+
10
+ class TestBlockReader < MiniTest::Test
11
+
12
+ def test_feedburner
13
+ blocks = BlockReader.from_file( "#{TextUtils.root}/test/data/feedburner.txt" ).read
14
+
15
+ ## note: regex - use %q - do NOT escape \. or \1 etc.
16
+ pattern = %q{<img[^>]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>}
17
+
18
+ test1 = %q{<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>}
19
+
20
+ assert_equal 2, blocks.size
21
+ assert_equal pattern, blocks[0].gsub( /[\n ]/, '' ) ## note: need to remove newlines and spaces
22
+ assert_equal test1, blocks[1]
23
+ end
24
+
25
+ end # class TestBlockReader
26
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-04 00:00:00.000000000 Z
11
+ date: 2015-01-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: props
@@ -128,6 +128,7 @@ files:
128
128
  - lib/textutils/page.rb
129
129
  - lib/textutils/parser/name_parser.rb
130
130
  - lib/textutils/patterns.rb
131
+ - lib/textutils/reader/block_reader.rb
131
132
  - lib/textutils/reader/code_reader.rb
132
133
  - lib/textutils/reader/fixture_reader.rb
133
134
  - lib/textutils/reader/hash_reader.rb
@@ -139,9 +140,11 @@ files:
139
140
  - lib/textutils/utils.rb
140
141
  - lib/textutils/version.rb
141
142
  - test/data/cl_all.txt
143
+ - test/data/feedburner.txt
142
144
  - test/helper.rb
143
145
  - test/test_address_helper.rb
144
146
  - test/test_asciify.rb
147
+ - test/test_block_reader.rb
145
148
  - test/test_fixture_reader.rb
146
149
  - test/test_hypertext_helper.rb
147
150
  - test/test_slugify.rb
@@ -189,3 +192,4 @@ test_files:
189
192
  - test/test_title_helper.rb
190
193
  - test/test_slugify.rb
191
194
  - test/test_address_helper.rb
195
+ - test/test_block_reader.rb