pascoale 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +30 -7
- data/lib/pascoale/formatter.rb +4 -3
- data/lib/pascoale/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2307ce9b797a93d8b3e34b1713f876deccc40ca7
|
4
|
+
data.tar.gz: a245bcd4c270bdb0174310a0ca744b55937e537a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f05925bb2a43974d600951932f5328dbae470eb592ff65925e7b7f0ccbd9a8aaadbad48414073dd697f627218c2c3ce536eb2f85edef1076f916dea72bf3f6e
|
7
|
+
data.tar.gz: fea1464a11e29dc3dccad26a248b3b6612a38d31ff829ac2880bba5f5c14b414f2313befc1e15e02e87bfb2c054586080b7622ea71b5d0137c90671623d78ce1
|
data/README.md
CHANGED
@@ -5,8 +5,10 @@ Minor utilities for text processing in **Brazilian Portuguese**.
|
|
5
5
|
I'm going to add new functions as I need them.
|
6
6
|
|
7
7
|
Currently it has:
|
8
|
-
-
|
9
|
-
-
|
8
|
+
- Simple formatting considering accents in portuguese (upcase, downcase, capitalize);
|
9
|
+
- Title formatting, considering prepositions and other others downcase;
|
10
|
+
- Variations of a word at one and two **edit distances** (Reference: http://norvig.com/spell-correct.html);
|
11
|
+
- Heuristic syllabic separation. My tests against a corpus of ~170K words shows 99.36% of correctness \o/.
|
10
12
|
|
11
13
|
The code is kinda slow, but I'm not worried about speed (yet).
|
12
14
|
|
@@ -28,6 +30,27 @@ Or install it yourself as:
|
|
28
30
|
|
29
31
|
## Usage
|
30
32
|
|
33
|
+
Text formatter
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
require 'pascoale'
|
37
|
+
|
38
|
+
text = Pascoale::Formatter.new('Isso é um teste de formatação')
|
39
|
+
|
40
|
+
# Basic formatting
|
41
|
+
puts text.upcase # => ISSO É UM TESTE DE FORMATAÇÃO
|
42
|
+
puts text.downcase # => isso é um teste de formatação
|
43
|
+
puts text.capitalize # => Isso é um teste de formatação
|
44
|
+
|
45
|
+
# Fancy formatting (good for titles)
|
46
|
+
puts text.as_title # => Isso É um Teste de Formatação
|
47
|
+
|
48
|
+
# Predicates
|
49
|
+
puts text.upcase.upcase? # => true
|
50
|
+
puts text.upcase.downcase? # => false
|
51
|
+
puts text.capitalize? # => true
|
52
|
+
```
|
53
|
+
|
31
54
|
Variations of a word (typos and misspelling)
|
32
55
|
|
33
56
|
```ruby
|
@@ -48,20 +71,20 @@ Syllabic separation
|
|
48
71
|
require 'pascoale'
|
49
72
|
|
50
73
|
separator = Pascoale::SyllableSeparator.new('exceção')
|
51
|
-
puts separator.separated.inspect # ["ex", "ce", "ção"]
|
74
|
+
puts separator.separated.inspect # => ["ex", "ce", "ção"]
|
52
75
|
|
53
76
|
separator = Pascoale::SyllableSeparator.new('aéreo')
|
54
|
-
puts separator.separated.inspect # ["a", "é", "re", "o"]
|
77
|
+
puts separator.separated.inspect # => ["a", "é", "re", "o"]
|
55
78
|
|
56
79
|
separator = Pascoale::SyllableSeparator.new('apneia')
|
57
|
-
puts separator.separated.inspect # ["ap", "nei", "a"]
|
80
|
+
puts separator.separated.inspect # => ["ap", "nei", "a"]
|
58
81
|
|
59
82
|
separator = Pascoale::SyllableSeparator.new('construir')
|
60
|
-
puts separator.separated.inspect # ["cons", "tru", "ir"]
|
83
|
+
puts separator.separated.inspect # => ["cons", "tru", "ir"]
|
61
84
|
|
62
85
|
# Known error :( :( :(
|
63
86
|
separator = Pascoale::SyllableSeparator.new('traidor')
|
64
|
-
puts separator.separated.inspect # ["tra", "i", "dor"] should be ["trai", "dor"]
|
87
|
+
puts separator.separated.inspect # => ["tra", "i", "dor"] should be ["trai", "dor"]
|
65
88
|
|
66
89
|
```
|
67
90
|
|
data/lib/pascoale/formatter.rb
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
module Pascoale
|
2
2
|
class Formatter
|
3
|
-
EXCEPTIONS = %(a o e da do de na no em as os das dos nas nos à com sem)
|
3
|
+
EXCEPTIONS = %(a o e da do de na no em as os das dos nas nos à com sem ao)
|
4
4
|
|
5
|
-
def initialize(text)
|
5
|
+
def initialize(text, force_downcase: EXCEPTIONS)
|
6
6
|
@text = text
|
7
|
+
@force_downcase = force_downcase
|
7
8
|
end
|
8
9
|
|
9
10
|
def as_title
|
10
11
|
def title_word(a_word)
|
11
|
-
if
|
12
|
+
if @force_downcase.include?(a_word.downcase)
|
12
13
|
a_word.downcase
|
13
14
|
else
|
14
15
|
a_word.capitalize
|
data/lib/pascoale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pascoale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ronie Uliana
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|