bookclean 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 205253301963b09bd6b28ae20a5b47aa8d362ec4
4
+ data.tar.gz: 9a46b4dbe422b88fb5e68f088d6afc3d0ce56002
5
+ SHA512:
6
+ metadata.gz: 6c13b9a033c0f6ef987f3211adf7a08b64ebffb674f3354cc3f0c0157f5e7924e856ce63a5216ffa706f48cf7e0c2b68ca4e10d937d5eb022acbca71f5eb5627
7
+ data.tar.gz: e6588af3aa35f6eeff873b5f64edb0d344055ee1788758e37daf34e7b4a244f1ff253d5d8eee3343b5148b041478939226c6c84b8ba227bc519cbb23adeb58a0
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/.project ADDED
@@ -0,0 +1,13 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <projectDescription>
3
+ <name>bookclean</name>
4
+ <comment></comment>
5
+ <projects>
6
+ </projects>
7
+ <buildSpec>
8
+ </buildSpec>
9
+ <natures>
10
+ <nature>com.aptana.projects.webnature</nature>
11
+ <nature>com.aptana.ruby.core.rubynature</nature>
12
+ </natures>
13
+ </projectDescription>
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in bookclean.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Samur Araujo
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # Bookclean
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'bookclean'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install bookclean
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it ( https://github.com/[my-github-username]/bookclean/fork )
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create a new Pull Request
30
+
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ desc "Run tests"
6
+ task :default => :test
7
+
8
+ Rake::TestTask.new do |t|
9
+ t.libs << %w(test lib)
10
+ t.pattern = "test/test_*.rb"
11
+ end
12
+
data/bookclean.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'bookclean/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "bookclean"
8
+ spec.version = Bookclean::VERSION
9
+ spec.authors = ["Samur Araujo"]
10
+ spec.email = ["samuraraujo@gmail.com"]
11
+ spec.summary = %q{Clean book metadata.}
12
+ spec.description = %q{A library with several functions to clean book metadata in portugues.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'lisbn'
22
+ spec.add_runtime_dependency 'library_stdnums'
23
+ spec.add_runtime_dependency "unicode_utils"
24
+
25
+ spec.add_development_dependency "bundler", "~> 1.6"
26
+ spec.add_development_dependency "rake"
27
+ spec.add_development_dependency "minitest"
28
+
29
+ end
@@ -0,0 +1,3 @@
1
+ module Bookclean
2
+ VERSION = "0.0.1"
3
+ end
data/lib/bookclean.rb ADDED
@@ -0,0 +1,34 @@
1
+ require "bookclean/version"
2
+ require "unicode_utils"
3
+
4
+ module BookClean
5
+ module Publisher
6
+ def self.clean(str, lang=:pt)
7
+ return str if str==nil
8
+ str=str.rstrip.lstrip
9
+ str = UnicodeUtils.downcase(str)
10
+ str.gsub!(/\s+/, ' ')
11
+ str.gsub!(/\s*ltda.?$/, '')
12
+
13
+ str.gsub!(/^editora/, '') if !str.match(/^editora\s+..\s+/) #editora da mente should keep editora
14
+ str.gsub!(/editora$/, '')
15
+
16
+
17
+ #Split words in tokens
18
+ #Match each token to dictionary of accented words.
19
+ #Join words by space
20
+ str = UnicodeUtils.titlecase(str)
21
+ #Downcase Prepositions
22
+ #Downcase Conjuntions
23
+ #Remove extra space (end, middle and end)
24
+ #Remove editora begin and end.
25
+
26
+ str.gsub!("Da", "da")
27
+ str.gsub!("De", "de")
28
+ str.gsub!("Do", "do")
29
+ str.gsub!("çao", "ção")
30
+
31
+ str=str.rstrip.lstrip
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,17 @@
1
+ require 'minitest/unit'
2
+ require 'minitest/autorun'
3
+ require 'minitest/pride'
4
+ require 'bookclean'
5
+
6
+ class BookCleanTest < MiniTest::Unit::TestCase
7
+ def test_publisher_clean
8
+ assert_equal "Difusão", BookClean::Publisher.clean("difusão editora")
9
+ assert_equal "Opção", BookClean::Publisher.clean("opÇÃo editora")
10
+ assert_equal "Neotrópica", BookClean::Publisher.clean("editora neotrópica")
11
+ assert_equal "Ática", BookClean::Publisher.clean("ática")
12
+ assert_equal "Gold", BookClean::Publisher.clean("gold editora ltda")
13
+ assert_equal "Gold", BookClean::Publisher.clean("gold editora ltda.")
14
+ assert_equal "Editora da Mente", BookClean::Publisher.clean("editora da mente")
15
+ assert_equal "Civilização", BookClean::Publisher.clean("civilizaÇao editora")
16
+ end
17
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bookclean
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Samur Araujo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-07-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: lisbn
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: library_stdnums
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: unicode_utils
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.6'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.6'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: minitest
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: A library with several functions to clean book metadata in portugues.
98
+ email:
99
+ - samuraraujo@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".project"
106
+ - Gemfile
107
+ - LICENSE.txt
108
+ - README.md
109
+ - Rakefile
110
+ - bookclean.gemspec
111
+ - lib/bookclean.rb
112
+ - lib/bookclean/version.rb
113
+ - test/test_bookclean.rb
114
+ homepage: ''
115
+ licenses:
116
+ - MIT
117
+ metadata: {}
118
+ post_install_message:
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ requirements: []
133
+ rubyforge_project:
134
+ rubygems_version: 2.2.2
135
+ signing_key:
136
+ specification_version: 4
137
+ summary: Clean book metadata.
138
+ test_files:
139
+ - test/test_bookclean.rb