plain_text 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +51 -0
- data/ChangeLog +5 -0
- data/Makefile +23 -0
- data/README.en.rdoc +172 -0
- data/Rakefile +9 -0
- data/bin/countchar +89 -0
- data/lib/plain_text/parse_rule.rb +474 -0
- data/lib/plain_text/part/boundary.rb +44 -0
- data/lib/plain_text/part/paragraph.rb +35 -0
- data/lib/plain_text/part.rb +973 -0
- data/lib/plain_text/split.rb +103 -0
- data/lib/plain_text/util.rb +104 -0
- data/lib/plain_text.rb +839 -0
- data/plain_text.gemspec +49 -0
- data/test/test_plain_text.rb +280 -0
- data/test/test_plain_text_parse_rule.rb +146 -0
- data/test/test_plain_text_part.rb +353 -0
- data/test/test_plain_text_split.rb +78 -0
- metadata +72 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
module PlainText
|
4
|
+
#
|
5
|
+
# Contains a method that splits a String in a reversible way
|
6
|
+
#
|
7
|
+
# String#split is a powerful method.
|
8
|
+
# One caveat is there is no way to guarantee the possibility to reverse
|
9
|
+
# the process when a *random* Regexp (as opposed to String or when the user
|
10
|
+
# knows what exactly the Regexp is or has a perfect control about it) is given,
|
11
|
+
# because the resultant Array contains *all* the group-ed String as elements.
|
12
|
+
#
|
13
|
+
# This module provides a method to enable it. Requiring this file
|
14
|
+
# makes the method included in the String class.
|
15
|
+
#
|
16
|
+
# @example Reversible (the method is assumed to be included in String)
|
17
|
+
# my_str.split_with_delimiter(/MyRegexp/).join == my_str # => true
|
18
|
+
#
|
19
|
+
# @author Masa Sakano (Wise Babel Ltd)
|
20
|
+
#
|
21
|
+
module Split
|
22
|
+
|
23
|
+
# The class-method version of the instance method of the same name.
|
24
|
+
#
|
25
|
+
# One more parameter (input String) is required to specify.
|
26
|
+
#
|
27
|
+
# @param instr [String] String that is examined.
|
28
|
+
# @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
|
29
|
+
# @return [Array]
|
30
|
+
# @see PlainText::Split#split_with_delimiter
|
31
|
+
def self.split_with_delimiter(instr, re_in)
|
32
|
+
re_in = Regexp.new(Regexp.quote(re_in)) if re_in.class.method_defined? :to_str
|
33
|
+
re_grp = add_grouping(re_in) # Ensure grouping.
|
34
|
+
|
35
|
+
arspl = instr.split re_grp, -1
|
36
|
+
return arspl if arspl.size <= 1 # n.b., Size is 0 for an empty string (only?).
|
37
|
+
|
38
|
+
n_grouping = re_grp.match(instr).size # The number of grouping - should be at least 2, including $&.
|
39
|
+
return adjust_last_element(arspl) if n_grouping <= 2
|
40
|
+
|
41
|
+
# Takes only the split main contents and delimeter
|
42
|
+
arret = []
|
43
|
+
arspl.each_with_index do |ec, ei|
|
44
|
+
arret << ec if (1..2).include?( (ei + 1) % n_grouping )
|
45
|
+
end
|
46
|
+
adjust_last_element(arret) # => Array
|
47
|
+
end
|
48
|
+
|
49
|
+
####################################################
|
50
|
+
# Class methods (Private)
|
51
|
+
####################################################
|
52
|
+
|
53
|
+
# This method encloses the given Regexp with '()'
|
54
|
+
#
|
55
|
+
# @param rule_re [Regexp]
|
56
|
+
# @return [Regexp]
|
57
|
+
def self.add_grouping(rule_re)
|
58
|
+
Regexp.new '('+rule_re.source+')', rule_re.options
|
59
|
+
end
|
60
|
+
private_class_method :add_grouping
|
61
|
+
|
62
|
+
# Utility
|
63
|
+
def self.adjust_last_element(ary)
|
64
|
+
ary.pop if ary[-1].empty? # ary.size > 0 is guaranteed
|
65
|
+
ary
|
66
|
+
end
|
67
|
+
private_class_method :adjust_last_element
|
68
|
+
|
69
|
+
####################################################
|
70
|
+
# Instance methods
|
71
|
+
####################################################
|
72
|
+
|
73
|
+
# Split with the delimiter even when Regexp (or String) is given
|
74
|
+
#
|
75
|
+
# Note the last empty component, if exists, is deleted in the returned Array.
|
76
|
+
# If the input string is empty, the returned Array is also empty,
|
77
|
+
# as in String#split.
|
78
|
+
#
|
79
|
+
# @example Standard split (without grouping) : +s="XQabXXcXQ"+
|
80
|
+
# s.split(/X+Q?/) #=> ["", "ab", "c"],
|
81
|
+
# s.split(/X+Q?/, -1) #=> ["", "ab", "c", ""],
|
82
|
+
#
|
83
|
+
# @example Standard split (with grouping) : +s="XQabXXcXQ"+
|
84
|
+
# s.split(/X+(Q?)/, -1) #=> ["", "Q", "ab", "", "c", "Q", ""],
|
85
|
+
# s.split(/(X+(Q?))/, -1) #=> ["", "XQ", "Q", "ab", "XX", "", "c", "XQ", "Q", ""],
|
86
|
+
#
|
87
|
+
# @example This method (when included in String (as Default)) : +s="XQabXXcXQ"+
|
88
|
+
# s.split_with_delimiter(/X+(Q?)/)
|
89
|
+
# #=> ["", "XQ", "ab", "XX", "c", "XQ"]
|
90
|
+
#
|
91
|
+
# @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
|
92
|
+
# @return [Array]
|
93
|
+
def split_with_delimiter(*rest)
|
94
|
+
PlainText::Split.public_send(__method__, self, *rest)
|
95
|
+
end
|
96
|
+
end # module Split
|
97
|
+
end # module PlainText
|
98
|
+
|
99
|
+
class String
|
100
|
+
# Enabling String#split_with_delimiter
|
101
|
+
include PlainText::Split
|
102
|
+
end
|
103
|
+
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
module PlainText
|
4
|
+
|
5
|
+
# Contains some utility methods for use in this module and classes.
|
6
|
+
#
|
7
|
+
# @author Masa Sakano (Wise Babel Ltd)
|
8
|
+
#
|
9
|
+
module Util
|
10
|
+
|
11
|
+
# All methods in this Module are module functions.
|
12
|
+
module_function
|
13
|
+
|
14
|
+
# Returns a pair of Arrays of even and odd number-indices of the original Array
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# even_odd_arrays([33,44,55], size_even: true)
|
18
|
+
# # => [[33, 55], [44, ""]]
|
19
|
+
#
|
20
|
+
# @param ary [Array]
|
21
|
+
# @param size_even: [Boolean] if true (Def: false), the sizes of the returned arrays are guaranteed to be identical.
|
22
|
+
# @param filler: [Object] if size_even: is true and if matching is performed, this filler is added at the end of the last element.
|
23
|
+
def even_odd_arrays(ary, size_even: false, filler: "")
|
24
|
+
ar_even = select.with_index { |_, i| i.even? } rescue select.each_with_index { |_, i| i.even? } # Rescue for Ruby 2.1 or earlier
|
25
|
+
ar_odd = select.with_index { |_, i| i.odd? } rescue select.each_with_index { |_, i| i.odd? } # Rescue for Ruby 2.1 or earlier
|
26
|
+
if size_even && (ar_even.size != ar_odd.size)
|
27
|
+
ar_odd.push filler
|
28
|
+
raise "Should not happern." if (ar_even.size != ar_odd.size)
|
29
|
+
end
|
30
|
+
[ar_even, ar_odd]
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns a non-negative Array index for self
|
34
|
+
#
|
35
|
+
# If positive or zero, it returns i.
|
36
|
+
# If the negative index is out of range, it returns nil.
|
37
|
+
#
|
38
|
+
# @param i [Integer]
|
39
|
+
# @param ary [Array] Reference Array.
|
40
|
+
# @return [Integer, NilClass] nil if out of range to the negative. Note in most cases in Ruby default, it raises IndexError. See the code of {#positive_array_index_checked}
|
41
|
+
# @raise [TypeError] if non-integer is specified.
|
42
|
+
# @raise [ArgumentError] if ary is not an Array, or more specifically, it does not have size method or ary.size does not return Integer or similar.
|
43
|
+
def positive_array_index(i, ary)
|
44
|
+
i2 = i.to_int rescue (raise TypeError, sprintf("no implicit conversion of #{i.class} into Integer"))
|
45
|
+
return i2 if i2 >= 0
|
46
|
+
ret = ary.size + i2 rescue (raise ArgumentError, "argument is not an array.")
|
47
|
+
(ret < 0) ? nil : ret
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# Returns a non-negative Array index for self, performing a check.
|
52
|
+
#
|
53
|
+
# Exception is raised if it is out of range.
|
54
|
+
#
|
55
|
+
# Wrapper for {#positive_array_index}
|
56
|
+
#
|
57
|
+
# @param index_in [Integer] Index to check and convert from. Potentially negative integer.
|
58
|
+
# @param ary [Array] Reference Array.
|
59
|
+
# @param accept_too_big: [Boolean, NilClass] if true (Default), a positive index larger than the last array index is returned as it is. If nil, the last index + 1 is accepted but raises an Exception for anything larger. If false, any index larger than the last index raises an Exception.
|
60
|
+
# @param varname: [NilClass, String] Name of the variable (or nil) to be used for error messages.
|
61
|
+
# @return [Integer] Non-negative index; i.e., if index=-1 is specified for an Array with a size of 3, the returned value is 2 (the last index of it).
|
62
|
+
# @raise [IndexError] if the index is out of the range to negative.
|
63
|
+
def positive_array_index_checked(index_in, ary, accept_too_big: true, varname: nil)
|
64
|
+
# def self.positive_valid_index_for_array(index_in, ary, varname: nil)
|
65
|
+
errmsgs = {}
|
66
|
+
%w(of for).each do |i|
|
67
|
+
errmsgs[i] = (varname ? "." : sprintf(" %s %s.", i, varname))
|
68
|
+
end
|
69
|
+
|
70
|
+
index = positive_array_index(index_in, ary) # guaranteed to be Integer or nil
|
71
|
+
raise IndexError, sprintf("index (%s) too small for array; minimum: -%d", index_in, ary.size) if !index # Ruby default Error message (except the variable "index" as opposed to "index_in is used in the true Ruby default).
|
72
|
+
if index_in >= 0
|
73
|
+
last_index = ary.size - 1
|
74
|
+
errnote1 = nil
|
75
|
+
if (index > last_index + 1) && !accept_too_big
|
76
|
+
errnote1 = ' (or +1)'
|
77
|
+
elsif (index == last_index + 1) && (false == accept_too_big)
|
78
|
+
errnote1 = " "
|
79
|
+
end
|
80
|
+
raise IndexError, sprintf("Specified index (%s) is larger than the last index (%d)%s%s", index_in, last_index, errnote1, errmsgs['of']) if errnote1
|
81
|
+
end
|
82
|
+
index
|
83
|
+
end
|
84
|
+
|
85
|
+
# Raise TypeError
|
86
|
+
#
|
87
|
+
# Call as +raise_typeerror(var_name)+ from instance methods,
|
88
|
+
# providing this Module is included in the Class/Module.
|
89
|
+
#
|
90
|
+
# @param var [Object]
|
91
|
+
# @param to_class [String, Class] class name converted into.
|
92
|
+
# @option verbose: [Boolean] ($DEBUG)
|
93
|
+
# @raise [TypeError]
|
94
|
+
def raise_typeerror(var, to_class, verbose: $DEBUG)
|
95
|
+
msg1 = (verbose ? sprintf("(<= %s)", var.inspect) : "")
|
96
|
+
to_class_str = (to_class.name rescue to_class.to_str)
|
97
|
+
raise TypeError, sprintf("no implicit conversion of %s%s into %s", var.class, msg1, to_class_str)
|
98
|
+
end
|
99
|
+
|
100
|
+
end # module Util
|
101
|
+
|
102
|
+
include Util
|
103
|
+
end # module PlainText
|
104
|
+
|