plain_text 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +5 -0
- data/README.en.rdoc +3 -3
- data/lib/plain_text/parse_rule.rb +1 -1
- data/lib/plain_text/part.rb +27 -27
- data/lib/plain_text.rb +3 -3
- data/plain_text.gemspec +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0bbafc2df85dc7fab4a71b03126805e7f2e9916ae0e3fee91e31d9584e5a6ee
|
4
|
+
data.tar.gz: b385be9df6ce8d8c1081a8c5a233daaa60659ccc25c44722c4a333d8cb81a8c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c3fb2676c18ad0f3e637fc4bac35af7b7fba6d664c852265cd59485d0909dd4e6739c8a4dc12257016472ef31ac8719ed85be061abfe9955a4adf5fcb8b16d29
|
7
|
+
data.tar.gz: dfa034f4130c02aa7ba12817da0395b78f5d6dfcfd349a80966a20e8aa98652ec4c2d2b8c4762b65d25dbfbe39244c2375663085b2189077a04a732300ca6dc7
|
data/ChangeLog
CHANGED
data/README.en.rdoc
CHANGED
@@ -29,7 +29,7 @@ which is the heart to describe the logical structure of documents.
|
|
29
29
|
It is basically a container class and indeed a sub-class of Array. It
|
30
30
|
can contain either of another {PlainText::Part} or more basic
|
31
31
|
components of either of {PlainText::Part::Paragraph} and
|
32
|
-
{PlainText::Part::Boundary}, both of which are sub-classes of String.
|
32
|
+
{PlainText::Part::Boundary}, both of which are sub-classes of String.
|
33
33
|
|
34
34
|
An example instance looks like this:
|
35
35
|
|
@@ -63,7 +63,7 @@ where the name of subclasses (or constants) here arbitrary, except for
|
|
63
63
|
{PlainText::Part::Paragraph::Empty} and
|
64
64
|
{PlainText::Part::Boundary::Empty}, which are pre-defined. Users can
|
65
65
|
define their own subclasses to help organize the logical structure at
|
66
|
-
their will.
|
66
|
+
their will.
|
67
67
|
|
68
68
|
Basically, at every layer, every {PlainText::Part} or
|
69
69
|
{PlainText::Part::Paragraph} is sandwiched by
|
@@ -75,7 +75,7 @@ String instance any time.
|
|
75
75
|
=== PlainText::ParseRule - Class to describe the rule of how to parse
|
76
76
|
|
77
77
|
{PlainText::ParseRule} is the class to describe how to parse initially
|
78
|
-
String, and subsequently {PlainText::Part}, which is basically an Array.
|
78
|
+
String, and subsequently {PlainText::Part}, which is basically an Array.
|
79
79
|
{PlainText::ParseRule} is a container class and holds a set of ordered
|
80
80
|
rules, each of which is either Proc or Regexp as a more simple rule.
|
81
81
|
A rule, Proc, is defined by a user and is designed to receive either
|
@@ -280,7 +280,7 @@ module PlainText
|
|
280
280
|
# Set (or reset) a human-readable name for {#rules} at a specified index
|
281
281
|
#
|
282
282
|
# @param name [NilClass, #to_s] nil to reset or a human-readable name, usually either String or Symbol
|
283
|
-
# @param
|
283
|
+
# @param index_rules [Integer] Index for {#rules}. A negative index is allowed.
|
284
284
|
# @return [Integer] Non-negative index where name is set; i.e., if index=-1 is specified for {#rules} with a size of 3, the returned value is 2 (the last index of it).
|
285
285
|
def set_name_at(name, index_rules)
|
286
286
|
index = PlainText::Util.positive_array_index_checked(index_rules, @rules, accept_too_big: false, varname: 'rules')
|
data/lib/plain_text/part.rb
CHANGED
@@ -160,10 +160,10 @@ module PlainText
|
|
160
160
|
# The indices provided in this method are for the main Array,
|
161
161
|
# and hence different from {#boundaries}.each_with_index
|
162
162
|
#
|
163
|
-
# @param (see #
|
163
|
+
# @param (see #map_boundary_with_index)
|
164
164
|
# @return as self
|
165
|
-
def
|
166
|
-
|
165
|
+
def each_boundary_with_index(**kwd, &bl)
|
166
|
+
map_boundary_core(map: false, with_index: true, **kwd, &bl)
|
167
167
|
end
|
168
168
|
|
169
169
|
# each method for parts only, providing also the index (always an even number) to the block.
|
@@ -176,10 +176,10 @@ module PlainText
|
|
176
176
|
# The indices provided in this method are for the main Array,
|
177
177
|
# and hence different from {#parts}.each_with_index
|
178
178
|
#
|
179
|
-
# @param (see #
|
179
|
+
# @param (see #map_part_with_index)
|
180
180
|
# @return as self
|
181
|
-
def
|
182
|
-
|
181
|
+
def each_part_with_index(**kwd, &bl)
|
182
|
+
map_part_core(map: false, with_index: false, **kwd, &bl)
|
183
183
|
end
|
184
184
|
|
185
185
|
# The first significant (=non-empty) element.
|
@@ -248,16 +248,16 @@ module PlainText
|
|
248
248
|
# @option recursive: [Boolean] if true (Default), map is performed recursively.
|
249
249
|
# @return as self
|
250
250
|
# @see #initialize for the other options (:compact and :compacter)
|
251
|
-
def
|
252
|
-
|
251
|
+
def map_boundary(**kwd, &bl)
|
252
|
+
map_boundary_core(with_index: false, **kwd, &bl)
|
253
253
|
end
|
254
254
|
|
255
255
|
# map method for boundaries only, providing also the index (always an odd number) to the block, returning a copied self.
|
256
256
|
#
|
257
|
-
# @param (see #
|
257
|
+
# @param (see #map_boundary)
|
258
258
|
# @return as self
|
259
|
-
def
|
260
|
-
|
259
|
+
def map_boundary_with_index(**kwd, &bl)
|
260
|
+
map_boundary_core(with_index: true, **kwd, &bl)
|
261
261
|
end
|
262
262
|
|
263
263
|
# map method for parts only, returning a copied self.
|
@@ -271,16 +271,16 @@ module PlainText
|
|
271
271
|
# @option recursive: [Boolean] if true (Default), map is performed recursively.
|
272
272
|
# @return as self
|
273
273
|
# @see #initialize for the other options (:compact and :compacter)
|
274
|
-
def
|
275
|
-
|
274
|
+
def map_part(**kwd, &bl)
|
275
|
+
map_part_core(with_index: false, **kwd, &bl)
|
276
276
|
end
|
277
277
|
|
278
278
|
# map method for parts only, providing also the index (always an even number) to the block, returning a copied self.
|
279
279
|
#
|
280
|
-
# @param (see #
|
280
|
+
# @param (see #map_part)
|
281
281
|
# @return as self
|
282
|
-
def
|
283
|
-
|
282
|
+
def map_part_with_index(**kwd, &bl)
|
283
|
+
map_part_core(with_index: false, **kwd, &bl)
|
284
284
|
end
|
285
285
|
|
286
286
|
# Normalize the content, making sure it has an even number of elements
|
@@ -387,7 +387,7 @@ module PlainText
|
|
387
387
|
def squash_boundary_at!(index)
|
388
388
|
(i_pos = get_valid_ipos_for_boundary(index)) || return
|
389
389
|
prt = self[i_pos-1]
|
390
|
-
m = :
|
390
|
+
m = :emptify_last_boundary!
|
391
391
|
self[i_pos] << prt.public_send(m) if prt.class.method_defined? m
|
392
392
|
self[i_pos]
|
393
393
|
end
|
@@ -397,7 +397,7 @@ module PlainText
|
|
397
397
|
#
|
398
398
|
# @return [self]
|
399
399
|
def squash_boundaryies!
|
400
|
-
|
400
|
+
each_boundary_with_index do |ec, i|
|
401
401
|
squash_boundary_at!(i)
|
402
402
|
end
|
403
403
|
self
|
@@ -628,7 +628,7 @@ module PlainText
|
|
628
628
|
ret = super
|
629
629
|
|
630
630
|
# The result may not be in an even number anymore. Correct it.
|
631
|
-
|
631
|
+
Boundary.insert_original_b4_part(size, "") if size.odd?
|
632
632
|
|
633
633
|
# Original method may fill some part of the array with String or even nil.
|
634
634
|
normalize!
|
@@ -772,7 +772,7 @@ module PlainText
|
|
772
772
|
# Emptifies all the Boundaries immediately before the index and squashes it to the one at it.
|
773
773
|
#
|
774
774
|
# @return [Boundary] all the descendants' last Boundaries merged.
|
775
|
-
def
|
775
|
+
def emptify_last_boundary!
|
776
776
|
return Boundary::Empty.dup if size == 0
|
777
777
|
ret = ""
|
778
778
|
ret << prt.public_send(__method__) if prt.class.method_defined? __method__
|
@@ -780,7 +780,7 @@ module PlainText
|
|
780
780
|
self[-1] = Boundary::Empty.dup
|
781
781
|
ret
|
782
782
|
end
|
783
|
-
private :
|
783
|
+
private :emptify_last_boundary!
|
784
784
|
|
785
785
|
|
786
786
|
# Returns a positive Integer index guaranteed to be 1 or greater and smaller than the size.
|
@@ -795,13 +795,13 @@ module PlainText
|
|
795
795
|
private :get_valid_ipos_for_boundary
|
796
796
|
|
797
797
|
|
798
|
-
# Core routine for {#
|
798
|
+
# Core routine for {#map_boundary} and similar.
|
799
799
|
#
|
800
800
|
# @option map opts: [Boolean] if true (Default), map is performed. Else just each.
|
801
801
|
# @option with_index: [Boolean] if true (Default: false), yield with also index
|
802
802
|
# @option recursive: [Boolean] if true (Default), map is performed recursively.
|
803
803
|
# @return as self if map: is true, else void
|
804
|
-
def
|
804
|
+
def map_boundary_core(map: true, with_index: false, recursive: true, **kwd, &bl)
|
805
805
|
ind = -1
|
806
806
|
arnew = map{ |ec|
|
807
807
|
ind += 1
|
@@ -815,16 +815,16 @@ module PlainText
|
|
815
815
|
}
|
816
816
|
self.class.new arnew, recursive: recursive, **kwd if map
|
817
817
|
end
|
818
|
-
private :
|
818
|
+
private :map_boundary_core
|
819
819
|
|
820
|
-
# Core routine for {#
|
820
|
+
# Core routine for {#map_part}
|
821
821
|
#
|
822
822
|
# @option map: [Boolean] if true (Default), map is performed. Else just each.
|
823
823
|
# @option with_index: [Boolean] if true (Default: false), yield with also index
|
824
824
|
# @option recursive: [Boolean] if true (Default), map is performed recursively.
|
825
825
|
# @return as self
|
826
826
|
# @see #initialize for the other options (:compact and :compacter)
|
827
|
-
def
|
827
|
+
def map_part_core(map: true, with_index: false, recursive: true, **kwd, &bl)
|
828
828
|
ind = -1
|
829
829
|
new_parts = parts.map{ |ec|
|
830
830
|
ind += 1
|
@@ -836,7 +836,7 @@ module PlainText
|
|
836
836
|
}
|
837
837
|
self.class.new new_parts, boundaries, recursive: recursive, **kwd if map
|
838
838
|
end
|
839
|
-
private :
|
839
|
+
private :map_part_core
|
840
840
|
|
841
841
|
# Core routine for {#normalize!} and {#normalize}
|
842
842
|
#
|
data/lib/plain_text.rb
CHANGED
@@ -303,7 +303,7 @@ module PlainText
|
|
303
303
|
# Boundary
|
304
304
|
case boundary_style
|
305
305
|
when String
|
306
|
-
prt.
|
306
|
+
prt.each_boundary_with_index{|ec, i| ((i == prt.size - 1) && ec.empty?) ? ec : ec.replace(boundary_style)}
|
307
307
|
when :truncate, :t
|
308
308
|
prt.boundaries.each{|ec| ec.gsub!(/[[:blank:]]+/m, ""); ec.gsub!(/\n+{3,}/m, "\n\n")}
|
309
309
|
when :truncate2, :t2
|
@@ -502,7 +502,7 @@ module PlainText
|
|
502
502
|
prt.parts.each do |e_pa|
|
503
503
|
# Each line treated as a Paragraph, and [[:space:]]+ between them as a Boundary.
|
504
504
|
# Then, to work on anything within a line except for line-head/tail is easy.
|
505
|
-
prt_para = Part.parse(e_pa, rule: ParseRule::RuleEachLineStrip).
|
505
|
+
prt_para = Part.parse(e_pa, rule: ParseRule::RuleEachLineStrip).map_part { |e_li|
|
506
506
|
case sps_style
|
507
507
|
when :truncate, :t
|
508
508
|
e_li.gsub(/[[:blank:]]{2,}/m, " ")
|
@@ -513,7 +513,7 @@ module PlainText
|
|
513
513
|
else
|
514
514
|
raise ArgumentError
|
515
515
|
end
|
516
|
-
} #
|
516
|
+
} # map_part
|
517
517
|
e_pa.replace prt_para.join
|
518
518
|
end
|
519
519
|
end
|
data/plain_text.gemspec
CHANGED
@@ -5,7 +5,7 @@ require 'date'
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{plain_text}.sub(/.*/){|c| (c == File.basename(Dir.pwd)) ? c : raise("ERROR: s.name=(#{c}) in gemspec seems wrong!")}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4"
|
9
9
|
# s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
10
|
%w(countchar textclean head.rb tail.rb).each do |f|
|
11
11
|
path = s.bindir+'/'+f
|
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
|
|
13
13
|
end
|
14
14
|
s.bindir = 'bin'
|
15
15
|
s.authors = ["Masa Sakano"]
|
16
|
-
s.date = %q{2019-10-
|
16
|
+
s.date = %q{2019-10-29}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
|
17
17
|
s.summary = %q{Module to handle Plain-Text}
|
18
18
|
s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance.}
|
19
19
|
# s.email = %q{abc@example.com}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: plain_text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.4'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Masa Sakano
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This module provides utility functions and methods to handle plain text,
|
14
14
|
classes Part/Paragraph/Boundary to represent the logical structure of a document
|