piola 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.rvmrc +3 -0
- data/Gemfile +8 -0
- data/LICENSE +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/lib/piola.rb +10 -0
- data/lib/piola/encoding.rb +37 -0
- data/lib/piola/html.rb +85 -0
- data/lib/piola/importance.rb +58 -0
- data/lib/piola/numbers.rb +21 -0
- data/lib/piola/parsing.rb +55 -0
- data/lib/piola/special_chars.rb +123 -0
- data/lib/piola/splitting.rb +49 -0
- data/lib/piola/sql.rb +210 -0
- data/lib/piola/uri.rb +25 -0
- data/lib/piola/version.rb +3 -0
- data/piola.gemspec +20 -0
- data/spec/encoding_spec.rb +38 -0
- data/spec/html_spec.rb +83 -0
- data/spec/importance_spec.rb +56 -0
- data/spec/numbers_spec.rb +49 -0
- data/spec/parsing_spec.rb +42 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/special_chars_spec.rb +93 -0
- data/spec/splitting_spec.rb +94 -0
- data/spec/sql_spec.rb +62 -0
- data/spec/uri_spec.rb +24 -0
- metadata +117 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
|
3
|
+
module Piola
|
4
|
+
|
5
|
+
module Splitting
|
6
|
+
|
7
|
+
# Converts a string into a word array without extra spaces, etc
|
8
|
+
def to_arr(options = {})
|
9
|
+
str = self
|
10
|
+
str = str.strip
|
11
|
+
str = str.gsub(',', ' ')
|
12
|
+
str = str.gsub('.', ' ')
|
13
|
+
str = str.gsub(/ +/, ' ')
|
14
|
+
arr = str.split(' ')
|
15
|
+
arr = arr.compact
|
16
|
+
arr = arr.uniq unless options[:non_unique]
|
17
|
+
arr
|
18
|
+
end
|
19
|
+
|
20
|
+
# Converts a string into a parragraph array
|
21
|
+
def string_to_parragraph_arr
|
22
|
+
str = self
|
23
|
+
str = str.strip
|
24
|
+
str = str.gsub(',', ' ')
|
25
|
+
str = str.gsub('.', ' ')
|
26
|
+
str = str.gsub(/ +/, ' ')
|
27
|
+
str = str.strip
|
28
|
+
str = str.gsub("\r", "\n")
|
29
|
+
arr = str.split("\n")
|
30
|
+
|
31
|
+
arr.map do |part|
|
32
|
+
part.strip if part.strip.present?
|
33
|
+
end.compact
|
34
|
+
end
|
35
|
+
|
36
|
+
# Converts a string into a parragaph array with only important sentences
|
37
|
+
def string_to_important_parragraph_arr
|
38
|
+
arr = self.string_to_parragraph_arr
|
39
|
+
|
40
|
+
arr.map do |part|
|
41
|
+
part if part.count_words >= 5 && !part.html_leftover?
|
42
|
+
end.compact.uniq
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
String.send :include, Piola::Splitting
|
data/lib/piola/sql.rb
ADDED
@@ -0,0 +1,210 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'active_support/core_ext/array/extract_options'
|
3
|
+
|
4
|
+
module Piola
|
5
|
+
|
6
|
+
module Sql
|
7
|
+
|
8
|
+
# Converts a string to a sql search string
|
9
|
+
def sql_search(fields)
|
10
|
+
str = self
|
11
|
+
str = str.gsub("'", '')
|
12
|
+
str = str.gsub('"', '')
|
13
|
+
str = str.gsub("\\", '')
|
14
|
+
str = str.gsub("%", '')
|
15
|
+
str = str.gsub("/", ' ')
|
16
|
+
str = str.gsub(":", ' ')
|
17
|
+
str = str.gsub("=", ' ')
|
18
|
+
str = str.gsub("?", ' ')
|
19
|
+
|
20
|
+
if str.strip.present?
|
21
|
+
arr = str.to_arr
|
22
|
+
rows = []
|
23
|
+
cells = []
|
24
|
+
|
25
|
+
fields.each do |field|
|
26
|
+
cells = []
|
27
|
+
|
28
|
+
arr.each do |a|
|
29
|
+
cells << "(#{field} LIKE \"%#{a}%\")"
|
30
|
+
end
|
31
|
+
|
32
|
+
rows << "(" + cells.join(" AND ") + ")"
|
33
|
+
end
|
34
|
+
|
35
|
+
return rows.join(" OR ")
|
36
|
+
else
|
37
|
+
return '0'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Converts a string to a sql search query
|
42
|
+
def searchify(*args)
|
43
|
+
fields = args.first
|
44
|
+
options = args.extract_options!
|
45
|
+
|
46
|
+
operator = options[:operator] || :and
|
47
|
+
operator = operator.to_s.upcase
|
48
|
+
|
49
|
+
str = self
|
50
|
+
str = str.gsub(/ +/, ' ')
|
51
|
+
str = str.gsub("'", '')
|
52
|
+
str = str.gsub("\\", '')
|
53
|
+
str = str.gsub("%", '')
|
54
|
+
str = str.gsub("/", ' ')
|
55
|
+
str = str.gsub(":", ' ')
|
56
|
+
str = str.gsub("=", ' ')
|
57
|
+
str = str.gsub("?", ' ')
|
58
|
+
str = str.strip
|
59
|
+
|
60
|
+
return '0' unless str.present?
|
61
|
+
|
62
|
+
exact_words = []
|
63
|
+
exacts = []
|
64
|
+
regulars = []
|
65
|
+
exact_word_excludes = []
|
66
|
+
exact_excludes = []
|
67
|
+
excludes = []
|
68
|
+
|
69
|
+
# Check for exact excludes
|
70
|
+
exact_excludes_matches = str.scan(/-"(.*?)"/)
|
71
|
+
|
72
|
+
exact_excludes_matches.each do |match|
|
73
|
+
match = match.first
|
74
|
+
|
75
|
+
# Remove match from main string
|
76
|
+
str = str.gsub(/-"#{match}"/, '').strip
|
77
|
+
|
78
|
+
exact_excludes << match.strip
|
79
|
+
end
|
80
|
+
|
81
|
+
# Check for exact word excludes
|
82
|
+
exact_word_excludes_matches = str.scan(/-`(.*?)`/)
|
83
|
+
|
84
|
+
exact_word_excludes_matches.each do |match|
|
85
|
+
match = match.first
|
86
|
+
|
87
|
+
# Remove match from main string
|
88
|
+
str = str.gsub(/-`#{match}`/, '').strip
|
89
|
+
|
90
|
+
exact_word_excludes << match.strip
|
91
|
+
end
|
92
|
+
|
93
|
+
# Check for exact words
|
94
|
+
exact_word_matches = str.scan(/`(.*?)`/)
|
95
|
+
|
96
|
+
exact_word_matches.each do |match|
|
97
|
+
match = match.first
|
98
|
+
|
99
|
+
# Remove match from main string
|
100
|
+
str = str.gsub(/`#{match}`/, '').strip
|
101
|
+
|
102
|
+
exact_words << match.strip
|
103
|
+
end
|
104
|
+
|
105
|
+
# Check for exact searches
|
106
|
+
exact_matches = str.scan(/"(.*?)"/)
|
107
|
+
|
108
|
+
exact_matches.each do |match|
|
109
|
+
match = match.first
|
110
|
+
|
111
|
+
# Remove match from main string
|
112
|
+
str = str.gsub(/"#{match}"/, '').strip
|
113
|
+
|
114
|
+
exacts << match.strip
|
115
|
+
end
|
116
|
+
|
117
|
+
# Check for excludes with spaces after them
|
118
|
+
exclude_matches = str.scan(/-.+? /)
|
119
|
+
|
120
|
+
exclude_matches.each do |match|
|
121
|
+
match.strip!
|
122
|
+
match = match.gsub(/-/, '')
|
123
|
+
|
124
|
+
# Remove match from main string
|
125
|
+
str = str.gsub(/-#{match}/, '').strip
|
126
|
+
|
127
|
+
excludes << match.strip
|
128
|
+
end
|
129
|
+
|
130
|
+
# Check for excludes at the end of the string
|
131
|
+
exclude_matches = str.scan(/-.+?$/)
|
132
|
+
|
133
|
+
exclude_matches.each do |match|
|
134
|
+
match.strip!
|
135
|
+
match = match.gsub(/-/, '')
|
136
|
+
|
137
|
+
# Remove match from main string
|
138
|
+
str = str.gsub(/-#{match}/, '').strip
|
139
|
+
|
140
|
+
excludes << match.strip
|
141
|
+
end
|
142
|
+
|
143
|
+
# Check for regular searches
|
144
|
+
regulars = str.to_arr
|
145
|
+
|
146
|
+
# Build the query
|
147
|
+
query = []
|
148
|
+
rows = []
|
149
|
+
cells = []
|
150
|
+
|
151
|
+
# Includes
|
152
|
+
query_includes = ""
|
153
|
+
|
154
|
+
if exact_words.any? || exacts.any? || regulars.any?
|
155
|
+
fields.each do |field|
|
156
|
+
cells = []
|
157
|
+
|
158
|
+
exact_words.each do |matcher|
|
159
|
+
cells << "(#{field} LIKE \"% #{matcher.remove_quotes} %\" OR #{field} LIKE \"% #{matcher.remove_quotes}.%\" OR #{field} LIKE \"% #{matcher.remove_quotes},%\")"
|
160
|
+
end
|
161
|
+
|
162
|
+
[exacts, regulars].each do |matchers|
|
163
|
+
matchers.each do |matcher|
|
164
|
+
cells << "#{field} LIKE \"%#{matcher.remove_quotes}%\""
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
rows << cells.join(" #{operator} ")
|
169
|
+
end
|
170
|
+
|
171
|
+
query_includes = rows.join(" OR ")
|
172
|
+
query << "(#{query_includes})"
|
173
|
+
end
|
174
|
+
|
175
|
+
# Excludes
|
176
|
+
query_excludes = ""
|
177
|
+
|
178
|
+
if exact_word_excludes.any? || exact_excludes.any? || excludes.any?
|
179
|
+
rows = []
|
180
|
+
cells = []
|
181
|
+
|
182
|
+
fields.each do |field|
|
183
|
+
cells = []
|
184
|
+
|
185
|
+
exact_word_excludes.each do |matcher|
|
186
|
+
cells << "(#{field} NOT LIKE \"% #{matcher.remove_quotes} %\" OR #{field} NOT LIKE \"% #{matcher.remove_quotes}.%\" OR #{field} NOT LIKE \"% #{matcher.remove_quotes},%\")"
|
187
|
+
end
|
188
|
+
|
189
|
+
[exact_excludes, excludes].each do |matchers|
|
190
|
+
matchers.each do |matcher|
|
191
|
+
cells << "#{field} NOT LIKE \"%#{matcher.remove_quotes}%\""
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
rows << cells.join(" #{operator} ")
|
196
|
+
end
|
197
|
+
|
198
|
+
query_excludes = rows.join(" AND ")
|
199
|
+
query << "(#{query_excludes})"
|
200
|
+
end
|
201
|
+
|
202
|
+
return query.join(" AND ") if query.any?
|
203
|
+
return '0'
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
end
|
209
|
+
|
210
|
+
String.send :include, Piola::Sql
|
data/lib/piola/uri.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
module Piola
|
2
|
+
|
3
|
+
module Uri
|
4
|
+
|
5
|
+
def uri_escape
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
str = URI.escape(self)
|
9
|
+
|
10
|
+
return str
|
11
|
+
end
|
12
|
+
|
13
|
+
def uri_unescape
|
14
|
+
require 'uri'
|
15
|
+
|
16
|
+
str = URI.unescape(self)
|
17
|
+
|
18
|
+
return str
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
String.send :include, Piola::Uri
|
data/piola.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/piola/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["David Jairala"]
|
6
|
+
gem.email = ["davidjairala@gmail.com"]
|
7
|
+
gem.description = %q{String extensions and quality of life methods}
|
8
|
+
gem.summary = %q{Provides a bunch of extensions for Strings, HTML manipulation methods, splitting, spanish language quality of life, etc.}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "piola"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Piola::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency 'htmlentities', ["~> 4.3.1"]
|
19
|
+
gem.add_dependency 'activesupport', [">= 3.0.0"]
|
20
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Piola::Encoding do
|
5
|
+
|
6
|
+
describe '#to_iso' do
|
7
|
+
|
8
|
+
it "should correctly convert an UTF8 string to ISO" do
|
9
|
+
'ááá'.to_iso.should_not be_utf8
|
10
|
+
'ááá'.to_iso.should be_iso
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
describe '#to_utf8' do
|
16
|
+
|
17
|
+
it "should correctly convert an ISO string to UTF8" do
|
18
|
+
'ááá'.force_encoding('ISO-8859-1').to_utf8.should eql('ááá')
|
19
|
+
'ááá'.force_encoding('ISO-8859-1').to_utf8.should be_utf8
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should correctly convert an UTF8 string to ISO and back" do
|
23
|
+
'á'.to_iso.to_utf8.should eql('á')
|
24
|
+
'á'.to_iso.to_utf8.should be_utf8
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
describe '#utf8?' do
|
30
|
+
it { 'foo bar baz'.should be_utf8 }
|
31
|
+
it { 'ááá'.should be_utf8 }
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#iso?' do
|
35
|
+
it { 'á'.to_iso.should be_iso }
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
data/spec/html_spec.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Piola::Html do
|
5
|
+
|
6
|
+
describe '#strip_tags' do
|
7
|
+
|
8
|
+
it "should remove tags from html docs" do
|
9
|
+
txt = '<div id="baz_id"><p>foo bar baz.<br></p></div><div style="clear:both;"></div>'
|
10
|
+
txt.strip_tags.should eql('foo bar baz.')
|
11
|
+
end
|
12
|
+
|
13
|
+
it "removes quoted tags" do
|
14
|
+
txt = '<div id="baz_id"><p>foo bar baz.<br></p></div><div style="clear:both;"></div>'
|
15
|
+
txt.strip_tags.should eql('foo bar baz.')
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#strip_tags_enters' do
|
21
|
+
|
22
|
+
let(:txt) { %Q{<div id="baz_id">
|
23
|
+
<p>
|
24
|
+
foo (bar) baz.
|
25
|
+
<br>
|
26
|
+
</p>
|
27
|
+
</div><div style="clear:both;"></div>} }
|
28
|
+
|
29
|
+
it "should remove tags but leave enters in their places" do
|
30
|
+
txt.strip_tags_enters.should eql("\n\n\n\nfoo baz.\n\n\n\n\n")
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should not remove parenthesis when specified" do
|
34
|
+
txt.strip_tags_enters(false).should eql("\n\n\n\nfoo (bar) baz.\n\n\n\n\n")
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
describe '#html_encode' do
|
40
|
+
|
41
|
+
it "should convert special chars to html entities" do
|
42
|
+
txt = 'áéíñÑÁ'
|
43
|
+
txt.html_encode.should eql('áéíñÑÁ')
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '#html_decode' do
|
49
|
+
|
50
|
+
it "should convert html entities to special chars" do
|
51
|
+
txt = 'áéíñÑÁ'
|
52
|
+
txt.html_encode.should eql('áéíñÑÁ')
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '#html_leftover?' do
|
58
|
+
|
59
|
+
it 'returns true if the string has any html leftovers' do
|
60
|
+
txt = '/* Style Definitions */ table MsoNormalTable mso-style-name:"Tabla normal"; mso-tstyle-rowband-size:0; mso-tstyle-colband-size:0; mso-style-noshow:yes; mso-style-priority:99; mso-style-parent:""; mso-padding-alt:0cm 5 4pt 0cm 5 4pt; mso-para-margin:0cm; mso-para-margin-bottom: 0001pt; mso-pagination:widow-orphan; font-size:10 0pt; font-family:"Calibri" "sans-serif"; mso-bidi-font-family:"Times New Roman";'
|
61
|
+
txt.should be_html_leftover
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'returns true for strings with brackets' do
|
65
|
+
'this is {a test'.should be_html_leftover
|
66
|
+
'this is }another test'.should be_html_leftover
|
67
|
+
'this { is yet another } test'.should be_html_leftover
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'returns true for strings with < or >' do
|
71
|
+
'this is <a test'.should be_html_leftover
|
72
|
+
'this is >another test'.should be_html_leftover
|
73
|
+
'this < is yet another > test'.should be_html_leftover
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'returns false if normal string' do
|
77
|
+
txt = 'Hasta ayer no se confirmó que el mandatario vaya a ofrecer alguna rueda de prensa o visite algún medio de comunicación tampoco se conoce sobre la agenda que vaya a tener luego de culminada la grabación del programa sabatino'
|
78
|
+
txt.should_not be_html_leftover
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Piola::Importance do
|
5
|
+
|
6
|
+
describe '#count_words' do
|
7
|
+
|
8
|
+
it "should count the quantity of words in a string" do
|
9
|
+
'foo bar baz'.count_words.should eql(3)
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#longest_parragraph' do
|
15
|
+
|
16
|
+
it "should fetch the longest parragraph from a text" do
|
17
|
+
txt = "this is the first parragraph\nthis is quite the longest parragraph by far as far as the eye can see\nanother for good measure"
|
18
|
+
txt.longest_parragraph.should eql('this is quite the longest parragraph by far as far as the eye can see')
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
describe '#important_words' do
|
24
|
+
|
25
|
+
it "should fetch the most important words from a text" do
|
26
|
+
txt = "Incrementar bonificación es la oferta común de precandidatos"
|
27
|
+
txt.important_words.should eql('Incrementar bonificación oferta común precandidatos')
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
describe '#just_words' do
|
33
|
+
|
34
|
+
it "should return just the most important clean words of a string" do
|
35
|
+
txt = "Incrementar bonificación es la oferta común de precandidatos (other not important stuff)"
|
36
|
+
txt.just_words.should eql('incrementar bonificacion oferta comun precandidatos')
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'strips quotes (even weird ones)' do
|
40
|
+
txt = "‘assange esta peor que una prision’ \"more quotes\""
|
41
|
+
txt.just_words.should eql('assange esta peor prision more quotes')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'removes words with equal or less than 3 chars' do
|
45
|
+
txt = "this is a test"
|
46
|
+
txt.just_words.should eql('this test')
|
47
|
+
end
|
48
|
+
|
49
|
+
it "doesn't remove words with 3 chars when option is passed" do
|
50
|
+
txt = "this is a test"
|
51
|
+
txt.just_words(small_words: true).should eql('this is a test')
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|