piola 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.rvmrc +3 -0
- data/Gemfile +8 -0
- data/LICENSE +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/lib/piola.rb +10 -0
- data/lib/piola/encoding.rb +37 -0
- data/lib/piola/html.rb +85 -0
- data/lib/piola/importance.rb +58 -0
- data/lib/piola/numbers.rb +21 -0
- data/lib/piola/parsing.rb +55 -0
- data/lib/piola/special_chars.rb +123 -0
- data/lib/piola/splitting.rb +49 -0
- data/lib/piola/sql.rb +210 -0
- data/lib/piola/uri.rb +25 -0
- data/lib/piola/version.rb +3 -0
- data/piola.gemspec +20 -0
- data/spec/encoding_spec.rb +38 -0
- data/spec/html_spec.rb +83 -0
- data/spec/importance_spec.rb +56 -0
- data/spec/numbers_spec.rb +49 -0
- data/spec/parsing_spec.rb +42 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/special_chars_spec.rb +93 -0
- data/spec/splitting_spec.rb +94 -0
- data/spec/sql_spec.rb +62 -0
- data/spec/uri_spec.rb +24 -0
- metadata +117 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rvmrc
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 David Jairala
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Piola
|
2
|
+
|
3
|
+
String extensions and quality of life methods
|
4
|
+
|
5
|
+
Provides a bunch of extensions for Strings, HTML manipulation methods, splitting, spanish language quality of life, etc.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'piola'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install piola
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
For general ussage tips and expectations, take a look at the tests in `spec/`
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/lib/piola.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Encoding
|
4
|
+
|
5
|
+
def to_iso
|
6
|
+
self.force_encoding('ISO-8859-1')
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_utf8
|
10
|
+
self.force_encoding('UTF-8')
|
11
|
+
end
|
12
|
+
|
13
|
+
def utf8?
|
14
|
+
begin
|
15
|
+
self.encoding.name == 'UTF-8'
|
16
|
+
rescue ArgumentError => e
|
17
|
+
return false if e.message == 'invalid byte sequence in UTF-8'
|
18
|
+
rescue Encoding::CompatibilityError
|
19
|
+
return false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def iso?
|
24
|
+
begin
|
25
|
+
self.encoding.name == 'ISO-8859-1'
|
26
|
+
rescue ArgumentError => e
|
27
|
+
return false if e.message == 'invalid byte sequence in ISO-8859-1'
|
28
|
+
rescue Encoding::CompatibilityError
|
29
|
+
return false
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
String.send :include, Piola::Encoding
|
data/lib/piola/html.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Html
|
4
|
+
|
5
|
+
# Remove html tags
|
6
|
+
def strip_tags
|
7
|
+
str = self
|
8
|
+
str = str.gsub(/<\/?[^>]*>/, '')
|
9
|
+
str = str.gsub(/<.*?>/, '')
|
10
|
+
str = str.gsub('»', '')
|
11
|
+
str = str.gsub(' ', ' ')
|
12
|
+
str = str.remove_all_parenthesis
|
13
|
+
str
|
14
|
+
end
|
15
|
+
|
16
|
+
# Remove html tags but leaves enters instead of tags
|
17
|
+
def strip_tags_enters(remove_parens = true)
|
18
|
+
str = self
|
19
|
+
str = str.gsub(/<\/?[^>]*>/, "\n").gsub('»', '').gsub(' ', ' ')
|
20
|
+
|
21
|
+
str = str.split("\n").map do |parragraph|
|
22
|
+
parragraph.strip
|
23
|
+
end.compact.join("\n")
|
24
|
+
|
25
|
+
str = str.remove_all_parenthesis(false) if remove_parens
|
26
|
+
str
|
27
|
+
end
|
28
|
+
|
29
|
+
# chars to html
|
30
|
+
def html_encode
|
31
|
+
require 'htmlentities'
|
32
|
+
|
33
|
+
coder = HTMLEntities.new
|
34
|
+
coder.encode(self, :named)
|
35
|
+
end
|
36
|
+
|
37
|
+
# html to chars
|
38
|
+
def html_decode
|
39
|
+
require 'htmlentities'
|
40
|
+
|
41
|
+
str = self
|
42
|
+
|
43
|
+
coder = HTMLEntities.new
|
44
|
+
return coder.decode(str)
|
45
|
+
rescue ArgumentError => e
|
46
|
+
if e.message == 'invalid byte sequence in UTF-8'
|
47
|
+
str = str.encode( 'UTF-8', 'Windows-1252' )
|
48
|
+
|
49
|
+
return coder.decode(str)
|
50
|
+
else
|
51
|
+
raise e
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Determines if a string might be an html/style/js leftover
|
56
|
+
def html_leftover?
|
57
|
+
[
|
58
|
+
/\/\*/,
|
59
|
+
/\*\//,
|
60
|
+
'{',
|
61
|
+
'}',
|
62
|
+
/document\./i,
|
63
|
+
/text\/javascript/i,
|
64
|
+
/this_options/i,
|
65
|
+
/socialwrap/i,
|
66
|
+
/followwrap/i,
|
67
|
+
/addtoany_list/i,
|
68
|
+
/addto/i,
|
69
|
+
/akocomment/i,
|
70
|
+
/imagetransform/i,
|
71
|
+
/warning\: mysql/i,
|
72
|
+
/error\: mysql/i,
|
73
|
+
'<',
|
74
|
+
'>'
|
75
|
+
].each do |suspect|
|
76
|
+
return true if self.match(suspect)
|
77
|
+
end
|
78
|
+
false
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
String.send :include, Piola::Html
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Importance
|
4
|
+
|
5
|
+
# Counts words in a string
|
6
|
+
def count_words
|
7
|
+
str = self
|
8
|
+
str = str.gsub(/( )+/, ' ')
|
9
|
+
str = str.strip
|
10
|
+
str = str.split(" ")
|
11
|
+
str.length
|
12
|
+
end
|
13
|
+
|
14
|
+
# Get most important parragraph from a text
|
15
|
+
def longest_parragraph
|
16
|
+
parragraphs = self.split("\n")
|
17
|
+
longest_p = ''
|
18
|
+
|
19
|
+
parragraphs.each do |p|
|
20
|
+
p = p.strip
|
21
|
+
|
22
|
+
if p.length >= longest_p.length
|
23
|
+
longest_p = p
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
longest_p
|
28
|
+
end
|
29
|
+
|
30
|
+
# Important words from a string
|
31
|
+
def important_words
|
32
|
+
str = self
|
33
|
+
str = str.gsub(/ +/, ' ').strip
|
34
|
+
parts = str.split(' ')
|
35
|
+
parts.reject { |p| p.length <= 3 }.join(' ')
|
36
|
+
end
|
37
|
+
|
38
|
+
# Return just the most important clean words of a string
|
39
|
+
def just_words(options = {})
|
40
|
+
str = self
|
41
|
+
str = str.
|
42
|
+
clean_text.
|
43
|
+
remove_quotes.
|
44
|
+
strip_tags.
|
45
|
+
remove_all_parenthesis.
|
46
|
+
downcase
|
47
|
+
|
48
|
+
str = str.important_words unless options[:small_words]
|
49
|
+
str = str.remove_special_chars unless options[:leave_special]
|
50
|
+
str = str.only_letters
|
51
|
+
str
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
String.send :include, Piola::Importance
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Numbers
|
4
|
+
|
5
|
+
# Checks if string is a valid number
|
6
|
+
def valid_number?
|
7
|
+
!!self.match(/^-?[\d]+(\.[\d]+){0,1}$/)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Removes numbers parragraphs from a text
|
11
|
+
def remove_numbers
|
12
|
+
self.split("\n").map do |parragraph|
|
13
|
+
parragraph unless parragraph.strip.valid_number?
|
14
|
+
end.compact.join("\n")
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
String.send :include, Piola::Numbers
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
|
3
|
+
module Piola
|
4
|
+
|
5
|
+
module Parsing
|
6
|
+
|
7
|
+
# Remove all parenthesis types
|
8
|
+
def remove_all_parenthesis(strip = true)
|
9
|
+
str = self
|
10
|
+
str = str.gsub(/\[.*\]/, "")
|
11
|
+
str = str.gsub(/\(.*\)/, '')
|
12
|
+
str = str.gsub(/\{.*\}/, '')
|
13
|
+
str = str.strip if strip
|
14
|
+
str = str.gsub(/ +/, ' ')
|
15
|
+
str
|
16
|
+
end
|
17
|
+
|
18
|
+
# Remove enters
|
19
|
+
def remove_enters
|
20
|
+
str = self
|
21
|
+
str = str.gsub("\n", " ")
|
22
|
+
str = str.gsub("\r", " ")
|
23
|
+
str = str.gsub(10.chr, " ")
|
24
|
+
str = str.gsub(13.chr, " ")
|
25
|
+
str = str.gsub("<br />", " ")
|
26
|
+
str = str.gsub("<br>", " ")
|
27
|
+
str = str.gsub("<br/>", " ")
|
28
|
+
str = str.gsub("<BR />", " ")
|
29
|
+
str = str.gsub("<BR>", " ")
|
30
|
+
str = str.gsub("<BR/>", " ")
|
31
|
+
str = str.gsub(/( )+/, ' ')
|
32
|
+
str = str.strip
|
33
|
+
str
|
34
|
+
end
|
35
|
+
|
36
|
+
# Removes extra enters
|
37
|
+
def remove_extra_enters
|
38
|
+
self.split("\n").map do |p|
|
39
|
+
p.strip if p.present?
|
40
|
+
end.compact.join("\n")
|
41
|
+
end
|
42
|
+
|
43
|
+
# Remove quotes
|
44
|
+
def remove_quotes
|
45
|
+
str = self
|
46
|
+
str = str.gsub('"', '')
|
47
|
+
str = str.gsub("'", '')
|
48
|
+
str
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
String.send :include, Piola::Parsing
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Piola
|
4
|
+
|
5
|
+
module SpecialChars
|
6
|
+
|
7
|
+
ACCENTS = [193, 201, 205, 209, 211, 218, 220, 225, 233, 237, 241, 243, 250, 252, 246, 214]
|
8
|
+
|
9
|
+
# Determines if a char is a spanish letter
|
10
|
+
def spanish_char?
|
11
|
+
ord = self.ord
|
12
|
+
return true if ord == 32
|
13
|
+
return true if ord >= 65 && ord <= 90
|
14
|
+
return true if ord >= 97 && ord <= 122
|
15
|
+
return true if ACCENTS.include? ord
|
16
|
+
false
|
17
|
+
end
|
18
|
+
|
19
|
+
# Remove all characters that are not pure letters
|
20
|
+
def only_letters
|
21
|
+
str = self
|
22
|
+
str = str.gsub(',', ' ')
|
23
|
+
str = str.gsub('.', ' ')
|
24
|
+
str = str.gsub(/ +/, ' ')
|
25
|
+
str = str.strip
|
26
|
+
|
27
|
+
str = str.split('').map do |char|
|
28
|
+
char if char.spanish_char?
|
29
|
+
end.compact.join
|
30
|
+
|
31
|
+
str = str.gsub(/ +/, ' ').strip
|
32
|
+
str
|
33
|
+
end
|
34
|
+
|
35
|
+
# Removes weird chars from a string
|
36
|
+
def clean_chars(options = {})
|
37
|
+
str = self
|
38
|
+
str = str.gsub(',', ' ') unless options[:keep_commas]
|
39
|
+
str = str.gsub('.', ' ') unless options[:keep_periods]
|
40
|
+
str = str.gsub(/ +/, ' ')
|
41
|
+
str = str.strip
|
42
|
+
|
43
|
+
str = str.split('').map do |char|
|
44
|
+
char if char.normal_char?
|
45
|
+
end.compact.join
|
46
|
+
|
47
|
+
str = str.gsub(/ +/, ' ').strip
|
48
|
+
str
|
49
|
+
end
|
50
|
+
|
51
|
+
def normal_char?
|
52
|
+
ord = self.ord
|
53
|
+
return true if ord >= 32 && ord <= 126
|
54
|
+
return true if ACCENTS.include? ord
|
55
|
+
false
|
56
|
+
end
|
57
|
+
|
58
|
+
# Converts special chars to downcase
|
59
|
+
def downcase_special_chars
|
60
|
+
str = self
|
61
|
+
str = str.gsub("Á", "á")
|
62
|
+
str = str.gsub("É", "é")
|
63
|
+
str = str.gsub("Í", "í")
|
64
|
+
str = str.gsub("Ó", "ó")
|
65
|
+
str = str.gsub("Ú", "ú")
|
66
|
+
str = str.gsub("Ñ", "ñ")
|
67
|
+
str = str.gsub("Ü", "ü")
|
68
|
+
str
|
69
|
+
end
|
70
|
+
|
71
|
+
# Remove spanish special chars
|
72
|
+
def remove_special_chars
|
73
|
+
str = self
|
74
|
+
str = str.gsub("Á", "A")
|
75
|
+
str = str.gsub("É", "E")
|
76
|
+
str = str.gsub("Í", "I")
|
77
|
+
str = str.gsub("Ó", "O")
|
78
|
+
str = str.gsub("Ú", "U")
|
79
|
+
str = str.gsub("Ñ", "N")
|
80
|
+
str = str.gsub("Ü", "U")
|
81
|
+
|
82
|
+
str = str.gsub("á", "a")
|
83
|
+
str = str.gsub("é", "e")
|
84
|
+
str = str.gsub("í", "i")
|
85
|
+
str = str.gsub("ó", "o")
|
86
|
+
str = str.gsub("ú", "u")
|
87
|
+
str = str.gsub("ñ", "n")
|
88
|
+
str = str.gsub("ü", "u")
|
89
|
+
str
|
90
|
+
end
|
91
|
+
|
92
|
+
# Clean text
|
93
|
+
def clean_text(remove_parens = true)
|
94
|
+
str = self
|
95
|
+
str = str.html_decode
|
96
|
+
str = str.remove_all_parenthesis if remove_parens
|
97
|
+
str = str.gsub(/\n|\t/, ' ').gsub(/ +/, ' ')
|
98
|
+
str = str.strip
|
99
|
+
str
|
100
|
+
end
|
101
|
+
|
102
|
+
# Get rid of all weird stuff for urls
|
103
|
+
def clean_url
|
104
|
+
str = self
|
105
|
+
str = str.remove_enters
|
106
|
+
str = str.remove_tabs
|
107
|
+
str
|
108
|
+
end
|
109
|
+
|
110
|
+
# Remove tabs
|
111
|
+
def remove_tabs
|
112
|
+
str = self
|
113
|
+
str = str.gsub(/\t/, "")
|
114
|
+
str = str.gsub(/ +/, ' ')
|
115
|
+
str = str.strip
|
116
|
+
str
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
String.send :include, Piola::SpecialChars
|