mightystring 0.1.4 → 0.1.5.b
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +10 -10
- data/{README → README.md} +30 -20
- data/bin/ms-striphtml +36 -36
- data/lib/mightystring.rb +57 -30
- data/lib/mightystring/string_at.rb +26 -16
- data/lib/mightystring/string_del.rb +51 -51
- data/lib/mightystring/string_each.rb +24 -0
- data/lib/mightystring/string_fetch.rb +13 -0
- data/lib/mightystring/string_find.rb +19 -0
- data/lib/mightystring/string_first.rb +13 -0
- data/lib/mightystring/string_index_all.rb +27 -27
- data/lib/mightystring/string_join.rb +13 -0
- data/lib/mightystring/string_last.rb +13 -0
- data/lib/mightystring/string_map.rb +19 -0
- data/lib/mightystring/string_match_pci.rb +16 -0
- data/lib/mightystring/string_method_missing.rb +25 -0
- data/lib/mightystring/string_pop.rb +15 -0
- data/lib/mightystring/string_push.rb +13 -0
- data/lib/mightystring/string_shift.rb +15 -0
- data/lib/mightystring/string_sort.rb +17 -0
- data/lib/mightystring/{string_stripbyac.rb → string_strip_byac.rb} +28 -28
- data/lib/mightystring/string_unshift.rb +13 -0
- data/lib/mightystring/strip_html.rb +143 -149
- data/lib/mightystring/version.rb +3 -3
- data/test/test_ms.rb +53 -50
- metadata +32 -21
- data/lib/mightystring/string_matchpci.rb +0 -16
@@ -1,27 +1,27 @@
|
|
1
|
-
# Part of MightyString
|
2
|
-
# by Daniel P. Clark
|
3
|
-
# webmaster@6ftdan.com
|
4
|
-
|
5
|
-
# Index_All
|
6
|
-
module Index_All
|
7
|
-
module String
|
8
|
-
# find_all(search string): Returns indexes of search string as an index array.
|
9
|
-
def index_all(in_srch = "")
|
10
|
-
in_srch = in_srch.to_s
|
11
|
-
if not in_srch.empty?
|
12
|
-
arr_indexes = []
|
13
|
-
srch_index = self.rindex(in_srch)
|
14
|
-
while not srch_index.nil? do
|
15
|
-
tmpStr = self[0..srch_index-1]
|
16
|
-
arr_indexes += [srch_index] # Put it in the list
|
17
|
-
if srch_index == 0
|
18
|
-
srch_index = nil
|
19
|
-
else
|
20
|
-
srch_index = tmpStr.rindex(in_srch)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
return arr_indexes.reverse
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
1
|
+
# Part of MightyString
|
2
|
+
# by Daniel P. Clark
|
3
|
+
# webmaster@6ftdan.com
|
4
|
+
|
5
|
+
# Index_All
|
6
|
+
module Index_All
|
7
|
+
module String
|
8
|
+
# find_all(search string): Returns indexes of search string as an index array.
|
9
|
+
def index_all(in_srch = "")
|
10
|
+
in_srch = in_srch.to_s
|
11
|
+
if not in_srch.empty?
|
12
|
+
arr_indexes = []
|
13
|
+
srch_index = self.rindex(in_srch)
|
14
|
+
while not srch_index.nil? do
|
15
|
+
tmpStr = self[0..srch_index-1]
|
16
|
+
arr_indexes += [srch_index] # Put it in the list
|
17
|
+
if srch_index == 0
|
18
|
+
srch_index = nil
|
19
|
+
else
|
20
|
+
srch_index = tmpStr.rindex(in_srch)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
return arr_indexes.reverse
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Part of MightyString
|
2
|
+
# by Daniel P. Clark
|
3
|
+
# webmaster@6ftdan.com
|
4
|
+
require 'forwardable'
|
5
|
+
|
6
|
+
module Map
|
7
|
+
module String
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
def self.included(base)
|
11
|
+
base.send :extend, Forwardable
|
12
|
+
end
|
13
|
+
|
14
|
+
# Map for String
|
15
|
+
delegate map: :chars
|
16
|
+
delegate map!: :chars
|
17
|
+
delegate flat_map: :chars
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Part of MightyString
|
2
|
+
# by Daniel P. Clark
|
3
|
+
# webmaster@6ftdan.com
|
4
|
+
|
5
|
+
# Match Partial Case-Insensitive
|
6
|
+
module Match_PCI
|
7
|
+
module String
|
8
|
+
# Match Partial Case-Insensitive: Usage: "My string has this?".matchpci('RinG') => true
|
9
|
+
def match_pci(in_srch = "")
|
10
|
+
if not in_srch.empty?
|
11
|
+
return !!self.downcase[in_srch.downcase]
|
12
|
+
end
|
13
|
+
return false
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Part of MightyString
|
2
|
+
# by Daniel P. Clark
|
3
|
+
# webmaster@6ftdan.com
|
4
|
+
|
5
|
+
module MethodMissing
|
6
|
+
module String
|
7
|
+
|
8
|
+
def method_missing(meth,*args, &block)
|
9
|
+
if self.chars.respond_to? meth
|
10
|
+
self.chars.send meth, *args, &block
|
11
|
+
else
|
12
|
+
super
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def respond_to?(meth)
|
17
|
+
if self.chars.respond_to? meth
|
18
|
+
true
|
19
|
+
else
|
20
|
+
super
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# Part of MightyString
|
2
|
+
# by Daniel P. Clark
|
3
|
+
# webmaster@6ftdan.com
|
4
|
+
|
5
|
+
|
6
|
+
module Sort
|
7
|
+
module String
|
8
|
+
# Return sorted String
|
9
|
+
def sort
|
10
|
+
return self.split("").sort.join
|
11
|
+
end
|
12
|
+
# Sort String
|
13
|
+
def sort!
|
14
|
+
self.replace self.split("").sort.join
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -1,28 +1,28 @@
|
|
1
|
-
# Part of MightyString
|
2
|
-
# by Daniel P. Clark
|
3
|
-
# webmaster@6ftdan.com
|
4
|
-
|
5
|
-
# Example acceptable charachters = (Range.new('a','z').to_a + Range.new('A','Z').to_a + Range.new('0','9').to_a + ['.','-','_',"'",'"',',']).flatten
|
6
|
-
|
7
|
-
# Strip by Acceptable Characters
|
8
|
-
module
|
9
|
-
module String
|
10
|
-
# Strip by Acceptable Characters : String.stripbyac(charlist) => Copy of New String (removes any character not in list)
|
11
|
-
def strip_byac(acceptchars)
|
12
|
-
if not acceptchars.nil?
|
13
|
-
if acceptchars.is_a?(String)
|
14
|
-
return self.split('').map!{|x| if acceptchars.split('').include?(x); x end }.join
|
15
|
-
elsif acceptchars.is_a?(Array)
|
16
|
-
return self.split('').map!{|x| if acceptchars.include?(x); x end }.join
|
17
|
-
elsif acceptchars.respond_to?(:[])
|
18
|
-
acceptchars = acceptchars.to_a
|
19
|
-
return self.split('').map!{|x| if acceptchars.include?(x); x end }.join
|
20
|
-
else
|
21
|
-
raise "#{puts acceptchars.class}"
|
22
|
-
end
|
23
|
-
else
|
24
|
-
raise StandardError.new('You must include a list of acceptable characters for this string in stripbyac(acceptchars)!')
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
1
|
+
# Part of MightyString
|
2
|
+
# by Daniel P. Clark
|
3
|
+
# webmaster@6ftdan.com
|
4
|
+
|
5
|
+
# Example acceptable charachters = (Range.new('a','z').to_a + Range.new('A','Z').to_a + Range.new('0','9').to_a + ['.','-','_',"'",'"',',']).flatten
|
6
|
+
|
7
|
+
# Strip by Acceptable Characters
|
8
|
+
module Strip_byAC
|
9
|
+
module String
|
10
|
+
# Strip by Acceptable Characters : String.stripbyac(charlist) => Copy of New String (removes any character not in list)
|
11
|
+
def strip_byac(acceptchars)
|
12
|
+
if not acceptchars.nil?
|
13
|
+
if acceptchars.is_a?(String)
|
14
|
+
return self.split('').map!{|x| if acceptchars.split('').include?(x); x end }.join
|
15
|
+
elsif acceptchars.is_a?(Array)
|
16
|
+
return self.split('').map!{|x| if acceptchars.include?(x); x end }.join
|
17
|
+
elsif acceptchars.respond_to?(:[])
|
18
|
+
acceptchars = acceptchars.to_a
|
19
|
+
return self.split('').map!{|x| if acceptchars.include?(x); x end }.join
|
20
|
+
else
|
21
|
+
raise "#{puts acceptchars.class}"
|
22
|
+
end
|
23
|
+
else
|
24
|
+
raise StandardError.new('You must include a list of acceptable characters for this string in stripbyac(acceptchars)!')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -1,149 +1,143 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# -
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
# -
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
# -
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
# - Added
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def self.
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
sh_end =
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
puts
|
145
|
-
puts license
|
146
|
-
puts
|
147
|
-
end
|
148
|
-
end # module Strip_HTML
|
149
|
-
end # MightyString
|
1
|
+
# APP_VERSION = '0.1 11-27-2012'
|
2
|
+
|
3
|
+
# Mighty String - Strip HTML
|
4
|
+
# Ruby should be easy to read, regex is not. I believe string block handling can be done better than rough regex'ing.
|
5
|
+
#
|
6
|
+
# TODO *FIXME* A rare href exception gets by, as well as some comment cases, only if math_by_space is enabled. 0.1 11-27-2012 "Release Version"
|
7
|
+
#
|
8
|
+
# Ver 0.1 11-27-2012 "Release Version"
|
9
|
+
# - Modularized and Gemified
|
10
|
+
# - Fixed mathmatical exceptions. Code is completely functional with test case.
|
11
|
+
#
|
12
|
+
# Ver Pre_0.3.0 8-7-2012
|
13
|
+
# - Added String modules for a healthy and useful String library
|
14
|
+
# - Finished mathmatical and generic & exceptions
|
15
|
+
#
|
16
|
+
# Ver Pre_0.2.1
|
17
|
+
# - Fixed indexing problem when only 1 of two cases where 'paired'. Such as '<' and not '>'. It died previously on nil.
|
18
|
+
# - Added test case
|
19
|
+
#
|
20
|
+
# Ver Pre_0.2
|
21
|
+
# - Added command line html file processing
|
22
|
+
# - Added case insensitivity for HTML snippets
|
23
|
+
#
|
24
|
+
# Ver Pre_0.1
|
25
|
+
# - HTML tag stripper with ASCII output
|
26
|
+
|
27
|
+
Strip_HTML_License = "
|
28
|
+
|
29
|
+
MightyString is licensed under 'The MIT License (MIT)'
|
30
|
+
|
31
|
+
Copyright (c) 2012 Daniel P. Clark & 6ft Dan(TM)
|
32
|
+
|
33
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
34
|
+
|
35
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
36
|
+
|
37
|
+
THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE."
|
38
|
+
# REQUIRE #
|
39
|
+
|
40
|
+
require_relative 'string_match_pci' unless defined? Match_PCI
|
41
|
+
require_relative 'string_index_all' unless defined? Index_All
|
42
|
+
|
43
|
+
# END REQUIRE #
|
44
|
+
class String
|
45
|
+
include Index_All::String
|
46
|
+
include Match_PCI::String
|
47
|
+
end
|
48
|
+
|
49
|
+
module MightyString
|
50
|
+
module HTML
|
51
|
+
# Define some generic rules here ***
|
52
|
+
# ---- COOL note: you can insert ASCII color escape code rules here... like for href then blue and for /a then plain
|
53
|
+
def self.html_to_text_codes
|
54
|
+
{"""=>"'","br"=>"\n","'" => "'", "td" => " | ", " " => " ", "™" => "(TM)", "©" => "(c)"} # replace html segment and insert plan text equivalent
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.math_by_space
|
58
|
+
false # TODO FIXME exceptions get past a href 12-12-12
|
59
|
+
end
|
60
|
+
|
61
|
+
# End define generic rules ***
|
62
|
+
|
63
|
+
def self.html_math_exceptions(in_str = "")
|
64
|
+
if in_str["< "] or in_str["& "]
|
65
|
+
return 1 # Execption found at beginning
|
66
|
+
elsif in_str["&"] and in_str[";"] and (in_str[" "] or in_str.length > 7) # Shouldn't have spaces in html &code;s or be greater than 7 in length
|
67
|
+
return 2 # Exception found for both
|
68
|
+
else
|
69
|
+
return 0
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
# strip sequence out ( master string, sequence to remove, any characters to swap inplace this for that )
|
75
|
+
def self.strip_first_seq( mstr = "", mseq = "", cmpchar = self.html_to_text_codes )
|
76
|
+
if not cmpchar.empty? and cmpchar.keys.any? {|mkey| mseq.match_pci(mkey) } # keys exist and one of the keys match
|
77
|
+
cmpchar.each_key { |mkey|
|
78
|
+
if mseq.match_pci(mkey)
|
79
|
+
mstr = mstr[0,mstr.index(mseq)] + cmpchar[mkey] + mstr[(mstr.index(mseq)+mseq.length)..-1]
|
80
|
+
end
|
81
|
+
}
|
82
|
+
elsif mstr.index(mseq)
|
83
|
+
mstr = mstr[0,mstr.index(mseq)] + mstr[(mstr.index(mseq)+mseq.length)..-1]
|
84
|
+
end
|
85
|
+
return mstr
|
86
|
+
end
|
87
|
+
|
88
|
+
# Pick tags/blocks of string to remove (ex: "&", ";" like in """ can become "" or "'" if rules set))
|
89
|
+
def self.strip_html( htmlstr = "", xarg = [["<",">"],["&",";"]] ) # xarg start, end
|
90
|
+
xarg.each { |g|
|
91
|
+
sh_endpoints = htmlstr.index_all(g[1])
|
92
|
+
if sh_endpoints.nil?
|
93
|
+
break
|
94
|
+
end
|
95
|
+
sh_end = htmlstr.rindex(g[1])
|
96
|
+
sh_start = htmlstr.rindex(g[0])
|
97
|
+
while !!sh_end and !!sh_start do
|
98
|
+
if sh_end > sh_start
|
99
|
+
sh_seq = htmlstr[sh_start,sh_end - sh_start + 1]
|
100
|
+
until sh_seq.count(g[1]) == 1 do # until we've selected only the inner block
|
101
|
+
sh_end = htmlstr[0,sh_end-1].rindex(g[1])
|
102
|
+
sh_seq = htmlstr[sh_start,sh_end - sh_start + 1]
|
103
|
+
end
|
104
|
+
if not (math_by_space and not html_math_exceptions(htmlstr[sh_start,sh_end - sh_start + 1]) == 0)
|
105
|
+
htmlstr = strip_first_seq( htmlstr, htmlstr[sh_start,sh_end - sh_start + 1])
|
106
|
+
else
|
107
|
+
sh_end = sh_end - 1
|
108
|
+
end
|
109
|
+
else
|
110
|
+
sh_start = sh_start - 1
|
111
|
+
end
|
112
|
+
sh_end = htmlstr[0..sh_end].rindex(g[1])
|
113
|
+
sh_start = htmlstr[0..sh_start].rindex(g[0])
|
114
|
+
end
|
115
|
+
}
|
116
|
+
return htmlstr
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.testCase
|
120
|
+
pagesample = "<html><body>This code primarily removes (less than)tags(greater than) and (amperstand)code(semicolon).<br>This default behavior can be modified to fit your needs.<br>4>3 doesn't pair up, so it's visible.<br>As well as this with a space 4 > 3.<br>The opposite is 3 < 4. Can you see me?<br>These following punctions don't get removed because they are out of matching order. ';and&'.<br>< This is visible because of the first space before the less than symbol. ><br>&This shows because it's longer then characters in length and has a space in it.;<br><br><table><tr><td>My Box Table</td></tr></table> <!-- <div>Old HTML commented out. This is a unique case.<br>The code finds the innermost blocks and removes them outwards. So something like '< !-- < tag >' or '< /tag > -- >' won't raise an error.<br>(I added the spaces so you can still see the ouput print.)</div> --><br><br><h1>Ruby is quite nice!</h1><br><a href='_blank'>http://www.6ftdan.com</a></body></html>"
|
121
|
+
puts pagesample
|
122
|
+
puts
|
123
|
+
puts " * - * - * - Before test is above. - * - * - after striphtml follows - * - * - *"
|
124
|
+
puts
|
125
|
+
puts strip_html(pagesample)
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.license
|
129
|
+
license = "Mighty_String::Strip_HTML is licensed under 'The MIT License (MIT)'
|
130
|
+
|
131
|
+
Copyright (c) 2012 Daniel P. Clark & 6ft Dan(TM)
|
132
|
+
|
133
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
134
|
+
|
135
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
136
|
+
|
137
|
+
THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE."
|
138
|
+
puts
|
139
|
+
puts license
|
140
|
+
puts
|
141
|
+
end
|
142
|
+
end # module Strip_HTML
|
143
|
+
end # MightyString
|