piola 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.rvmrc +3 -0
- data/Gemfile +8 -0
- data/LICENSE +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/lib/piola.rb +10 -0
- data/lib/piola/encoding.rb +37 -0
- data/lib/piola/html.rb +85 -0
- data/lib/piola/importance.rb +58 -0
- data/lib/piola/numbers.rb +21 -0
- data/lib/piola/parsing.rb +55 -0
- data/lib/piola/special_chars.rb +123 -0
- data/lib/piola/splitting.rb +49 -0
- data/lib/piola/sql.rb +210 -0
- data/lib/piola/uri.rb +25 -0
- data/lib/piola/version.rb +3 -0
- data/piola.gemspec +20 -0
- data/spec/encoding_spec.rb +38 -0
- data/spec/html_spec.rb +83 -0
- data/spec/importance_spec.rb +56 -0
- data/spec/numbers_spec.rb +49 -0
- data/spec/parsing_spec.rb +42 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/special_chars_spec.rb +93 -0
- data/spec/splitting_spec.rb +94 -0
- data/spec/sql_spec.rb +62 -0
- data/spec/uri_spec.rb +24 -0
- metadata +117 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rvmrc
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 David Jairala
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Piola
|
2
|
+
|
3
|
+
String extensions and quality of life methods
|
4
|
+
|
5
|
+
Provides a bunch of extensions for Strings, HTML manipulation methods, splitting, spanish language quality of life, etc.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'piola'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install piola
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
For general ussage tips and expectations, take a look at the tests in `spec/`
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/lib/piola.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Encoding
|
4
|
+
|
5
|
+
def to_iso
|
6
|
+
self.force_encoding('ISO-8859-1')
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_utf8
|
10
|
+
self.force_encoding('UTF-8')
|
11
|
+
end
|
12
|
+
|
13
|
+
def utf8?
|
14
|
+
begin
|
15
|
+
self.encoding.name == 'UTF-8'
|
16
|
+
rescue ArgumentError => e
|
17
|
+
return false if e.message == 'invalid byte sequence in UTF-8'
|
18
|
+
rescue Encoding::CompatibilityError
|
19
|
+
return false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def iso?
|
24
|
+
begin
|
25
|
+
self.encoding.name == 'ISO-8859-1'
|
26
|
+
rescue ArgumentError => e
|
27
|
+
return false if e.message == 'invalid byte sequence in ISO-8859-1'
|
28
|
+
rescue Encoding::CompatibilityError
|
29
|
+
return false
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
String.send :include, Piola::Encoding
|
data/lib/piola/html.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Html
|
4
|
+
|
5
|
+
# Remove html tags
|
6
|
+
def strip_tags
|
7
|
+
str = self
|
8
|
+
str = str.gsub(/<\/?[^>]*>/, '')
|
9
|
+
str = str.gsub(/<.*?>/, '')
|
10
|
+
str = str.gsub('»', '')
|
11
|
+
str = str.gsub(' ', ' ')
|
12
|
+
str = str.remove_all_parenthesis
|
13
|
+
str
|
14
|
+
end
|
15
|
+
|
16
|
+
# Remove html tags but leaves enters instead of tags
|
17
|
+
def strip_tags_enters(remove_parens = true)
|
18
|
+
str = self
|
19
|
+
str = str.gsub(/<\/?[^>]*>/, "\n").gsub('»', '').gsub(' ', ' ')
|
20
|
+
|
21
|
+
str = str.split("\n").map do |parragraph|
|
22
|
+
parragraph.strip
|
23
|
+
end.compact.join("\n")
|
24
|
+
|
25
|
+
str = str.remove_all_parenthesis(false) if remove_parens
|
26
|
+
str
|
27
|
+
end
|
28
|
+
|
29
|
+
# chars to html
|
30
|
+
def html_encode
|
31
|
+
require 'htmlentities'
|
32
|
+
|
33
|
+
coder = HTMLEntities.new
|
34
|
+
coder.encode(self, :named)
|
35
|
+
end
|
36
|
+
|
37
|
+
# html to chars
|
38
|
+
def html_decode
|
39
|
+
require 'htmlentities'
|
40
|
+
|
41
|
+
str = self
|
42
|
+
|
43
|
+
coder = HTMLEntities.new
|
44
|
+
return coder.decode(str)
|
45
|
+
rescue ArgumentError => e
|
46
|
+
if e.message == 'invalid byte sequence in UTF-8'
|
47
|
+
str = str.encode( 'UTF-8', 'Windows-1252' )
|
48
|
+
|
49
|
+
return coder.decode(str)
|
50
|
+
else
|
51
|
+
raise e
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Determines if a string might be an html/style/js leftover
|
56
|
+
def html_leftover?
|
57
|
+
[
|
58
|
+
/\/\*/,
|
59
|
+
/\*\//,
|
60
|
+
'{',
|
61
|
+
'}',
|
62
|
+
/document\./i,
|
63
|
+
/text\/javascript/i,
|
64
|
+
/this_options/i,
|
65
|
+
/socialwrap/i,
|
66
|
+
/followwrap/i,
|
67
|
+
/addtoany_list/i,
|
68
|
+
/addto/i,
|
69
|
+
/akocomment/i,
|
70
|
+
/imagetransform/i,
|
71
|
+
/warning\: mysql/i,
|
72
|
+
/error\: mysql/i,
|
73
|
+
'<',
|
74
|
+
'>'
|
75
|
+
].each do |suspect|
|
76
|
+
return true if self.match(suspect)
|
77
|
+
end
|
78
|
+
false
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
String.send :include, Piola::Html
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Importance
|
4
|
+
|
5
|
+
# Counts words in a string
|
6
|
+
def count_words
|
7
|
+
str = self
|
8
|
+
str = str.gsub(/( )+/, ' ')
|
9
|
+
str = str.strip
|
10
|
+
str = str.split(" ")
|
11
|
+
str.length
|
12
|
+
end
|
13
|
+
|
14
|
+
# Get most important parragraph from a text
|
15
|
+
def longest_parragraph
|
16
|
+
parragraphs = self.split("\n")
|
17
|
+
longest_p = ''
|
18
|
+
|
19
|
+
parragraphs.each do |p|
|
20
|
+
p = p.strip
|
21
|
+
|
22
|
+
if p.length >= longest_p.length
|
23
|
+
longest_p = p
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
longest_p
|
28
|
+
end
|
29
|
+
|
30
|
+
# Important words from a string
|
31
|
+
def important_words
|
32
|
+
str = self
|
33
|
+
str = str.gsub(/ +/, ' ').strip
|
34
|
+
parts = str.split(' ')
|
35
|
+
parts.reject { |p| p.length <= 3 }.join(' ')
|
36
|
+
end
|
37
|
+
|
38
|
+
# Return just the most important clean words of a string
|
39
|
+
def just_words(options = {})
|
40
|
+
str = self
|
41
|
+
str = str.
|
42
|
+
clean_text.
|
43
|
+
remove_quotes.
|
44
|
+
strip_tags.
|
45
|
+
remove_all_parenthesis.
|
46
|
+
downcase
|
47
|
+
|
48
|
+
str = str.important_words unless options[:small_words]
|
49
|
+
str = str.remove_special_chars unless options[:leave_special]
|
50
|
+
str = str.only_letters
|
51
|
+
str
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
String.send :include, Piola::Importance
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Numbers
|
4
|
+
|
5
|
+
# Checks if string is a valid number
|
6
|
+
def valid_number?
|
7
|
+
!!self.match(/^-?[\d]+(\.[\d]+){0,1}$/)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Removes numbers parragraphs from a text
|
11
|
+
def remove_numbers
|
12
|
+
self.split("\n").map do |parragraph|
|
13
|
+
parragraph unless parragraph.strip.valid_number?
|
14
|
+
end.compact.join("\n")
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
String.send :include, Piola::Numbers
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
|
3
|
+
module Piola
|
4
|
+
|
5
|
+
module Parsing
|
6
|
+
|
7
|
+
# Remove all parenthesis types
|
8
|
+
def remove_all_parenthesis(strip = true)
|
9
|
+
str = self
|
10
|
+
str = str.gsub(/\[.*\]/, "")
|
11
|
+
str = str.gsub(/\(.*\)/, '')
|
12
|
+
str = str.gsub(/\{.*\}/, '')
|
13
|
+
str = str.strip if strip
|
14
|
+
str = str.gsub(/ +/, ' ')
|
15
|
+
str
|
16
|
+
end
|
17
|
+
|
18
|
+
# Remove enters
|
19
|
+
def remove_enters
|
20
|
+
str = self
|
21
|
+
str = str.gsub("\n", " ")
|
22
|
+
str = str.gsub("\r", " ")
|
23
|
+
str = str.gsub(10.chr, " ")
|
24
|
+
str = str.gsub(13.chr, " ")
|
25
|
+
str = str.gsub("<br />", " ")
|
26
|
+
str = str.gsub("<br>", " ")
|
27
|
+
str = str.gsub("<br/>", " ")
|
28
|
+
str = str.gsub("<BR />", " ")
|
29
|
+
str = str.gsub("<BR>", " ")
|
30
|
+
str = str.gsub("<BR/>", " ")
|
31
|
+
str = str.gsub(/( )+/, ' ')
|
32
|
+
str = str.strip
|
33
|
+
str
|
34
|
+
end
|
35
|
+
|
36
|
+
# Removes extra enters
|
37
|
+
def remove_extra_enters
|
38
|
+
self.split("\n").map do |p|
|
39
|
+
p.strip if p.present?
|
40
|
+
end.compact.join("\n")
|
41
|
+
end
|
42
|
+
|
43
|
+
# Remove quotes
|
44
|
+
def remove_quotes
|
45
|
+
str = self
|
46
|
+
str = str.gsub('"', '')
|
47
|
+
str = str.gsub("'", '')
|
48
|
+
str
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
String.send :include, Piola::Parsing
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Piola
|
4
|
+
|
5
|
+
module SpecialChars
|
6
|
+
|
7
|
+
ACCENTS = [193, 201, 205, 209, 211, 218, 220, 225, 233, 237, 241, 243, 250, 252, 246, 214]
|
8
|
+
|
9
|
+
# Determines if a char is a spanish letter
|
10
|
+
def spanish_char?
|
11
|
+
ord = self.ord
|
12
|
+
return true if ord == 32
|
13
|
+
return true if ord >= 65 && ord <= 90
|
14
|
+
return true if ord >= 97 && ord <= 122
|
15
|
+
return true if ACCENTS.include? ord
|
16
|
+
false
|
17
|
+
end
|
18
|
+
|
19
|
+
# Remove all characters that are not pure letters
|
20
|
+
def only_letters
|
21
|
+
str = self
|
22
|
+
str = str.gsub(',', ' ')
|
23
|
+
str = str.gsub('.', ' ')
|
24
|
+
str = str.gsub(/ +/, ' ')
|
25
|
+
str = str.strip
|
26
|
+
|
27
|
+
str = str.split('').map do |char|
|
28
|
+
char if char.spanish_char?
|
29
|
+
end.compact.join
|
30
|
+
|
31
|
+
str = str.gsub(/ +/, ' ').strip
|
32
|
+
str
|
33
|
+
end
|
34
|
+
|
35
|
+
# Removes weird chars from a string
|
36
|
+
def clean_chars(options = {})
|
37
|
+
str = self
|
38
|
+
str = str.gsub(',', ' ') unless options[:keep_commas]
|
39
|
+
str = str.gsub('.', ' ') unless options[:keep_periods]
|
40
|
+
str = str.gsub(/ +/, ' ')
|
41
|
+
str = str.strip
|
42
|
+
|
43
|
+
str = str.split('').map do |char|
|
44
|
+
char if char.normal_char?
|
45
|
+
end.compact.join
|
46
|
+
|
47
|
+
str = str.gsub(/ +/, ' ').strip
|
48
|
+
str
|
49
|
+
end
|
50
|
+
|
51
|
+
def normal_char?
|
52
|
+
ord = self.ord
|
53
|
+
return true if ord >= 32 && ord <= 126
|
54
|
+
return true if ACCENTS.include? ord
|
55
|
+
false
|
56
|
+
end
|
57
|
+
|
58
|
+
# Converts special chars to downcase
|
59
|
+
def downcase_special_chars
|
60
|
+
str = self
|
61
|
+
str = str.gsub("Á", "á")
|
62
|
+
str = str.gsub("É", "é")
|
63
|
+
str = str.gsub("Í", "í")
|
64
|
+
str = str.gsub("Ó", "ó")
|
65
|
+
str = str.gsub("Ú", "ú")
|
66
|
+
str = str.gsub("Ñ", "ñ")
|
67
|
+
str = str.gsub("Ü", "ü")
|
68
|
+
str
|
69
|
+
end
|
70
|
+
|
71
|
+
# Remove spanish special chars
|
72
|
+
def remove_special_chars
|
73
|
+
str = self
|
74
|
+
str = str.gsub("Á", "A")
|
75
|
+
str = str.gsub("É", "E")
|
76
|
+
str = str.gsub("Í", "I")
|
77
|
+
str = str.gsub("Ó", "O")
|
78
|
+
str = str.gsub("Ú", "U")
|
79
|
+
str = str.gsub("Ñ", "N")
|
80
|
+
str = str.gsub("Ü", "U")
|
81
|
+
|
82
|
+
str = str.gsub("á", "a")
|
83
|
+
str = str.gsub("é", "e")
|
84
|
+
str = str.gsub("í", "i")
|
85
|
+
str = str.gsub("ó", "o")
|
86
|
+
str = str.gsub("ú", "u")
|
87
|
+
str = str.gsub("ñ", "n")
|
88
|
+
str = str.gsub("ü", "u")
|
89
|
+
str
|
90
|
+
end
|
91
|
+
|
92
|
+
# Clean text
|
93
|
+
def clean_text(remove_parens = true)
|
94
|
+
str = self
|
95
|
+
str = str.html_decode
|
96
|
+
str = str.remove_all_parenthesis if remove_parens
|
97
|
+
str = str.gsub(/\n|\t/, ' ').gsub(/ +/, ' ')
|
98
|
+
str = str.strip
|
99
|
+
str
|
100
|
+
end
|
101
|
+
|
102
|
+
# Get rid of all weird stuff for urls
|
103
|
+
def clean_url
|
104
|
+
str = self
|
105
|
+
str = str.remove_enters
|
106
|
+
str = str.remove_tabs
|
107
|
+
str
|
108
|
+
end
|
109
|
+
|
110
|
+
# Remove tabs
|
111
|
+
def remove_tabs
|
112
|
+
str = self
|
113
|
+
str = str.gsub(/\t/, "")
|
114
|
+
str = str.gsub(/ +/, ' ')
|
115
|
+
str = str.strip
|
116
|
+
str
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
String.send :include, Piola::SpecialChars
|