arquivo 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +77 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +2 -0
- data/arquivo.gemspec +39 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/arquivo +7 -0
- data/lib/arquivo.rb +56 -0
- data/lib/arquivo/dir.rb +121 -0
- data/lib/arquivo/extrato.rb +130 -0
- data/lib/arquivo/jpg.rb +77 -0
- data/lib/arquivo/pdf.rb +118 -0
- data/lib/arquivo/version.rb +5 -0
- metadata +146 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 925dc8622c67a80de14421206ce7c8ab1287785421cfa218acf84b34534a55cc
|
4
|
+
data.tar.gz: a0f7b4964c7d85f13a95e7a9dfde0d40fb7794dd523bfd208407c399450bb05d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4a15ce5b4d3514c7e9648fc6e73321387e93990b27f4065e8ec41d33d5643262764ed1bf4c956d969cdd730814db4be766b233d9e3b0a5729759c51ec381a3de
|
7
|
+
data.tar.gz: a75b8f9aa5d2da504d58770c394b8340e3c2db51c77126a10a2d20619e6080c884506758e83a18a477d6db654cb9e9e82c5f3dc35714f3e4790f6a5c6186cda2
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
arquivo (0.1.4)
|
5
|
+
fastimage (~> 2.1)
|
6
|
+
google-api-client (~> 0.34)
|
7
|
+
pdf-reader (~> 2.3)
|
8
|
+
thor (~> 0.1)
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
Ascii85 (1.0.3)
|
14
|
+
addressable (2.7.0)
|
15
|
+
public_suffix (>= 2.0.2, < 5.0)
|
16
|
+
afm (0.2.2)
|
17
|
+
declarative (0.0.10)
|
18
|
+
declarative-option (0.1.0)
|
19
|
+
faraday (0.17.1)
|
20
|
+
multipart-post (>= 1.2, < 3)
|
21
|
+
fastimage (2.1.7)
|
22
|
+
google-api-client (0.36.0)
|
23
|
+
addressable (~> 2.5, >= 2.5.1)
|
24
|
+
googleauth (~> 0.9)
|
25
|
+
httpclient (>= 2.8.1, < 3.0)
|
26
|
+
mini_mime (~> 1.0)
|
27
|
+
representable (~> 3.0)
|
28
|
+
retriable (>= 2.0, < 4.0)
|
29
|
+
signet (~> 0.12)
|
30
|
+
googleauth (0.10.0)
|
31
|
+
faraday (~> 0.12)
|
32
|
+
jwt (>= 1.4, < 3.0)
|
33
|
+
memoist (~> 0.16)
|
34
|
+
multi_json (~> 1.11)
|
35
|
+
os (>= 0.9, < 2.0)
|
36
|
+
signet (~> 0.12)
|
37
|
+
hashery (2.1.2)
|
38
|
+
httpclient (2.8.3)
|
39
|
+
jwt (2.2.1)
|
40
|
+
memoist (0.16.2)
|
41
|
+
mini_mime (1.0.2)
|
42
|
+
multi_json (1.14.1)
|
43
|
+
multipart-post (2.1.1)
|
44
|
+
os (1.0.1)
|
45
|
+
pdf-reader (2.4.0)
|
46
|
+
Ascii85 (~> 1.0.0)
|
47
|
+
afm (~> 0.2.1)
|
48
|
+
hashery (~> 2.0)
|
49
|
+
ruby-rc4
|
50
|
+
ttfunk
|
51
|
+
public_suffix (4.0.1)
|
52
|
+
rake (10.5.0)
|
53
|
+
representable (3.0.4)
|
54
|
+
declarative (< 0.1.0)
|
55
|
+
declarative-option (< 0.2.0)
|
56
|
+
uber (< 0.2.0)
|
57
|
+
retriable (3.1.2)
|
58
|
+
ruby-rc4 (0.1.5)
|
59
|
+
signet (0.12.0)
|
60
|
+
addressable (~> 2.3)
|
61
|
+
faraday (~> 0.9)
|
62
|
+
jwt (>= 1.5, < 3.0)
|
63
|
+
multi_json (~> 1.10)
|
64
|
+
thor (0.20.3)
|
65
|
+
ttfunk (1.5.1)
|
66
|
+
uber (0.1.0)
|
67
|
+
|
68
|
+
PLATFORMS
|
69
|
+
ruby
|
70
|
+
|
71
|
+
DEPENDENCIES
|
72
|
+
arquivo!
|
73
|
+
bundler (~> 1.17)
|
74
|
+
rake (~> 10.0)
|
75
|
+
|
76
|
+
BUNDLED WITH
|
77
|
+
1.17.2
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2019 TODO: Write your name
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# Arquivo
|
2
|
+
|
3
|
+
Processa documentos do condominio ph1341c118 para arquivo.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'arquivo'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install arquivo
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
$ arquivo dir PASTA # processa faturas/recibos/extratos/minutas
|
24
|
+
$ arquivo pdf FILE # processa extratos ou faturas
|
25
|
+
$ arquivo help [COMMAND] # Describe available commands or one specific command
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/arquivo.
|
36
|
+
|
37
|
+
## License
|
38
|
+
|
39
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/arquivo.gemspec
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'arquivo/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'arquivo'
|
9
|
+
spec.version = Arquivo::VERSION
|
10
|
+
spec.authors = ['Hernâni Rodrigues Vaz']
|
11
|
+
spec.email = ['hernanirvaz@gmail.com']
|
12
|
+
spec.homepage = 'https://github.com/ph1341c118/arquivo'
|
13
|
+
|
14
|
+
spec.summary = 'Write a short summary, because RubyGems requires one.'
|
15
|
+
spec.description = ' Write a longer description or delete this line.'
|
16
|
+
spec.license = 'MIT'
|
17
|
+
|
18
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
19
|
+
spec.metadata['yard.run'] = 'yard'
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the
|
23
|
+
# RubyGem that have been added into git.
|
24
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
25
|
+
`git ls-files -z`.split("\x0")
|
26
|
+
.reject { |f| f.match(%r{^(test|spec|features)/}) }
|
27
|
+
end
|
28
|
+
spec.bindir = 'exe'
|
29
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
|
+
spec.require_paths = ['lib']
|
31
|
+
|
32
|
+
spec.add_development_dependency 'bundler', '~> 1.17'
|
33
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
34
|
+
|
35
|
+
spec.add_dependency 'fastimage', '~> 2.1'
|
36
|
+
spec.add_dependency 'google-api-client', '~> 0.34'
|
37
|
+
spec.add_dependency 'pdf-reader', '~> 2.3'
|
38
|
+
spec.add_dependency 'thor', '~> 0.1'
|
39
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "arquivo"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/arquivo
ADDED
data/lib/arquivo.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'thor'
|
4
|
+
# require 'arquivo/version'
|
5
|
+
# require 'arquivo/extrato'
|
6
|
+
# require 'arquivo/pdf'
|
7
|
+
# require 'arquivo/dir'
|
8
|
+
# require 'arquivo/jpg'
|
9
|
+
require '/home/c118/ruby/arquivo/lib/arquivo/version.rb'
|
10
|
+
require '/home/c118/ruby/arquivo/lib/arquivo/extrato.rb'
|
11
|
+
require '/home/c118/ruby/arquivo/lib/arquivo/pdf.rb'
|
12
|
+
require '/home/c118/ruby/arquivo/lib/arquivo/dir.rb'
|
13
|
+
require '/home/c118/ruby/arquivo/lib/arquivo/jpg.rb'
|
14
|
+
|
15
|
+
module Arquivo
|
16
|
+
class Error < StandardError; end
|
17
|
+
|
18
|
+
# size limit for trim attempt
|
19
|
+
LT = 9000
|
20
|
+
|
21
|
+
# A4 page (8.27x11.69) inches
|
22
|
+
X4 = 8.27
|
23
|
+
Y4 = 11.69
|
24
|
+
|
25
|
+
# CLI para analisar/processar documentos c118
|
26
|
+
class CLI < Thor
|
27
|
+
desc 'pdf FILE', 'processa extratos ou faturas'
|
28
|
+
def pdf(file)
|
29
|
+
return unless File.ftype(file) == 'file'
|
30
|
+
|
31
|
+
f = C118pdf.new(file)
|
32
|
+
return unless f.processa_extrato?
|
33
|
+
|
34
|
+
system "mkdir -p #{f.base}"
|
35
|
+
# extrato contem conta c118
|
36
|
+
if f.extrato?
|
37
|
+
f.processa_extrato(0)
|
38
|
+
else
|
39
|
+
f.split
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
desc 'dir PASTA', 'processa faturas/recibos/extratos/minutas'
|
44
|
+
option :fuzz, type: :numeric, default: 29,
|
45
|
+
desc: 'fuzziness para corte das imagens no pdf'
|
46
|
+
option :quality, type: :numeric, default: 15,
|
47
|
+
desc: 'qualidade das imagens no pdf'
|
48
|
+
def dir(fdir)
|
49
|
+
return unless File.ftype(fdir) == 'directory'
|
50
|
+
|
51
|
+
system 'mkdir -p tmp/zip'
|
52
|
+
C118dir.new(fdir).processa_pasta(options)
|
53
|
+
# system 'rm -rf tmp'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/arquivo/dir.rb
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'google/apis/sheets_v4'
|
4
|
+
require 'googleauth'
|
5
|
+
require 'googleauth/stores/file_token_store'
|
6
|
+
|
7
|
+
# ordenacao c118 dos documentos
|
8
|
+
class Array
|
9
|
+
def c118_ordena
|
10
|
+
sort do |a, b|
|
11
|
+
# processa pela ordem <st2>-<st1>
|
12
|
+
c = File.basename(a, '.???')
|
13
|
+
d = File.basename(b, '.???')
|
14
|
+
c[/(\w+)-(\w+)/, 2] + c[/(\w+)-(\w+)/, 1] <=>
|
15
|
+
d[/(\w+)-(\w+)/, 2] + d[/(\w+)-(\w+)/, 1]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
module Arquivo
|
21
|
+
# analisar/processar pasta
|
22
|
+
class C118dir < Enumerator
|
23
|
+
# @return [Enumerator] lista ordenada items dentro duma pasta
|
24
|
+
attr_reader :items
|
25
|
+
# @return [String] documento c118
|
26
|
+
attr_reader :item
|
27
|
+
# @return [Hash] dados (faturas/recibos) de c118-contas
|
28
|
+
attr_reader :dados
|
29
|
+
|
30
|
+
# @return [String] base do documento
|
31
|
+
attr_reader :base
|
32
|
+
|
33
|
+
# @return [C118dir] documentos c118
|
34
|
+
def initialize(pasta)
|
35
|
+
@items = Dir.glob(File.join(pasta, '*')).each
|
36
|
+
@base = File.basename(pasta, File.extname(pasta)).downcase +
|
37
|
+
Date.today.strftime('%Y%m%d')
|
38
|
+
obter_dados(pasta)
|
39
|
+
end
|
40
|
+
|
41
|
+
def obter_dados(dir)
|
42
|
+
return unless /factura/i.match?(dir) ||
|
43
|
+
/recibo/i.match?(dir) ||
|
44
|
+
dados.empty?
|
45
|
+
|
46
|
+
# obtem dados (faturas/recibos) da sheet c118-contas
|
47
|
+
id = '1PbiMrtTtqGztZMhe3AiJbDS6NQE9o3hXebnQEFdt954'
|
48
|
+
sh = (/factura/i.match?(dir) ? 'rft' : 'rrc') + '!A2:E'
|
49
|
+
@dados = c118_sheets.get_spreadsheet_values(id, sh).values
|
50
|
+
.group_by { |k| k[0][/\w+/] }
|
51
|
+
rescue StandardError
|
52
|
+
@dados = {}
|
53
|
+
end
|
54
|
+
|
55
|
+
# assegura credenciais validas, obtidas dum arquivo de credencias
|
56
|
+
#
|
57
|
+
# @return [Google::Apis::SheetsV4::SheetsService] c118 sheets_v4
|
58
|
+
def c118_sheets
|
59
|
+
p = '/home/c118/c118-'
|
60
|
+
# file obtido console.cloud.google.com/apis OAuth 2.0 client IDs
|
61
|
+
i = Google::Auth::ClientId.from_file("#{p}credentials.json")
|
62
|
+
s = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY
|
63
|
+
f = Google::Auth::Stores::FileTokenStore.new(file: "#{p}token.yaml")
|
64
|
+
z = Google::Auth::UserAuthorizer.new(i, s, f)
|
65
|
+
|
66
|
+
sheets = Google::Apis::SheetsV4::SheetsService.new
|
67
|
+
sheets.client_options.application_name = 'c118-arquivo'
|
68
|
+
sheets.authorization = z.get_credentials('default') ||
|
69
|
+
new_credentials(z, 'urn:ietf:wg:oauth:2.0:oob')
|
70
|
+
sheets
|
71
|
+
end
|
72
|
+
|
73
|
+
# inicializar OAuth2 authorization abrindo URL e copiando novo codigo
|
74
|
+
#
|
75
|
+
# @return [Google::Auth::UserAuthorizer] OAuth2 credentials
|
76
|
+
def new_credentials(aut, oob)
|
77
|
+
puts 'Open URL and copy code after authorization',
|
78
|
+
aut.get_authorization_url(base_url: oob)
|
79
|
+
aut.get_and_store_credentials_from_code(user_id: 'default',
|
80
|
+
code: 'codigo aqui',
|
81
|
+
base_url: oob)
|
82
|
+
end
|
83
|
+
|
84
|
+
# @return [String] texto duma pagina pdf
|
85
|
+
def next_item
|
86
|
+
@item = items.next
|
87
|
+
rescue StopIteration
|
88
|
+
@item = nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def processa_pasta(options)
|
92
|
+
return unless next_item
|
93
|
+
|
94
|
+
if File.ftype(item) == 'directory'
|
95
|
+
C118dir.new(item).processa_pasta(options)
|
96
|
+
else
|
97
|
+
processa_file(options)
|
98
|
+
processa_pasta(options)
|
99
|
+
processa_fim
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def processa_fim
|
104
|
+
system "rm -f #{base}.*;" \
|
105
|
+
"pdftk tmp/stamped*.pdf cat output #{base}.pdf;cd tmp/zip;" \
|
106
|
+
"tar cf ../../#{base}.tar *.pdf;" \
|
107
|
+
"gzip --best ../../#{base}.tar;"
|
108
|
+
# 'rm -f *.pdf ../*.pdf'
|
109
|
+
end
|
110
|
+
|
111
|
+
def processa_file(options)
|
112
|
+
case File.extname(item).downcase
|
113
|
+
when '.mp3' then puts 'mp3'
|
114
|
+
when '.jpg' then C118jpg.new(item).processa_jpg(options, dados)
|
115
|
+
when '.pdf' then C118pdf.new(item).processa_pdf(options, dados)
|
116
|
+
else
|
117
|
+
puts "erro: #{item} so posso processar mp3, jpg, pdf"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pdf-reader'
|
4
|
+
|
5
|
+
module Arquivo
|
6
|
+
# analisar/processar pdf
|
7
|
+
class C118pdf < String
|
8
|
+
# @return [String] nome do documento
|
9
|
+
attr_reader :file
|
10
|
+
# @return [String] extensao do documento
|
11
|
+
attr_reader :ext
|
12
|
+
# @return [String] base do documento
|
13
|
+
attr_reader :base
|
14
|
+
|
15
|
+
# @return [String] texto duma pagina pdf
|
16
|
+
attr_reader :page
|
17
|
+
# @return [String] base extrato processado
|
18
|
+
attr_reader :nome
|
19
|
+
# @return [String] list paginas extrato processado
|
20
|
+
attr_reader :list
|
21
|
+
|
22
|
+
# @return [String] key do documento ft????/rc????/ex??0??/sc??????
|
23
|
+
attr_reader :key
|
24
|
+
# @return [Numeric] tamanho do pdf
|
25
|
+
attr_reader :size
|
26
|
+
|
27
|
+
# @return [C118jpg] scanned jpg em processamento
|
28
|
+
attr_reader :pjpg
|
29
|
+
# @return [C118pdf] pdf em processamento
|
30
|
+
attr_reader :ppdf
|
31
|
+
|
32
|
+
# @return [C118pdf] pdf c118
|
33
|
+
def initialize(fpdf)
|
34
|
+
@file = fpdf
|
35
|
+
@ext = File.extname(fpdf).downcase
|
36
|
+
@base = File.basename(fpdf, File.extname(fpdf)).downcase
|
37
|
+
|
38
|
+
@key = @base[/\w+/]
|
39
|
+
@size = File.size(fpdf)
|
40
|
+
|
41
|
+
@ppdf = self
|
42
|
+
end
|
43
|
+
|
44
|
+
def c118_gs
|
45
|
+
# filtrar images para scq e extratos
|
46
|
+
fi = /^[se]/i.match?(key.to_s) ? ' -dFILTERIMAGE' : ''
|
47
|
+
|
48
|
+
'gs -sDEVICE=pdfwrite ' \
|
49
|
+
'-dNOPAUSE -dBATCH -dQUIET ' \
|
50
|
+
'-sPAPERSIZE=a4 -dFIXEDMEDIA -dPDFFitPage ' \
|
51
|
+
'-dPDFSETTINGS=/screen -dDetectDuplicateImages ' \
|
52
|
+
'-dColorImageDownsampleThreshold=1 ' \
|
53
|
+
'-dGrayImageDownsampleThreshold=1 ' \
|
54
|
+
'-dMonoImageDownsampleThreshold=1' + fi
|
55
|
+
end
|
56
|
+
|
57
|
+
def processa_extrato?
|
58
|
+
return true if !File.exist?(base) &&
|
59
|
+
File.exist?(file) && ext == '.pdf' &&
|
60
|
+
first_page
|
61
|
+
|
62
|
+
if File.exist?(base)
|
63
|
+
puts "erro: #{base} pasta ja existe"
|
64
|
+
else
|
65
|
+
puts "erro: #{file} nao consigo obter primeira pagina do PDF"
|
66
|
+
end
|
67
|
+
false
|
68
|
+
end
|
69
|
+
|
70
|
+
def processa_extrato(cnt)
|
71
|
+
cnt += 1
|
72
|
+
@list += ',' + cnt.to_s if c118_conta?
|
73
|
+
if next_page
|
74
|
+
faz_extrato if extrato?
|
75
|
+
processa_extrato(cnt)
|
76
|
+
else
|
77
|
+
faz_extrato
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def extrato?
|
82
|
+
c118_conta? && page.match?(/extrato +combinado/i)
|
83
|
+
end
|
84
|
+
|
85
|
+
def faz_extrato
|
86
|
+
system "#{c118_gs} " \
|
87
|
+
"-sOutputFile=#{base}/#{nome}-extrato.pdf " \
|
88
|
+
"-sPageList=#{list[1..-1]} \"#{file}\" 1>/dev/null 2>&1"
|
89
|
+
puts "#{nome}-extrato"
|
90
|
+
base_extrato
|
91
|
+
end
|
92
|
+
|
93
|
+
def c118_conta?
|
94
|
+
page.include?('45463760224')
|
95
|
+
end
|
96
|
+
|
97
|
+
# @return [PDF::Reader] leitor pdf
|
98
|
+
def rpdf
|
99
|
+
@rpdf ||= PDF::Reader.new(file).pages.lazy
|
100
|
+
rescue StandardError
|
101
|
+
@rpdf = nil
|
102
|
+
end
|
103
|
+
|
104
|
+
# @return [String] texto duma pagina pdf
|
105
|
+
def next_page
|
106
|
+
@page = rpdf.next.text
|
107
|
+
rescue StopIteration
|
108
|
+
@page = nil
|
109
|
+
end
|
110
|
+
|
111
|
+
def base_extrato
|
112
|
+
return false unless page
|
113
|
+
|
114
|
+
@list = ''
|
115
|
+
n = page.scan(%r{N\. *(\d+)/(\d+)}).flatten
|
116
|
+
@nome = "ex#{n[0].to_s[/\d{2}$/]}#{n[1]}"
|
117
|
+
rescue StandardError
|
118
|
+
@nome = nil
|
119
|
+
end
|
120
|
+
|
121
|
+
def first_page
|
122
|
+
rpdf && next_page && base_extrato
|
123
|
+
end
|
124
|
+
|
125
|
+
def split
|
126
|
+
system "pdftk #{file} burst output #{base}/pg%04d-#{base}.pdf;" \
|
127
|
+
"rm -f #{base}/*.txt"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
data/lib/arquivo/jpg.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'fastimage'
|
4
|
+
|
5
|
+
module Arquivo
|
6
|
+
# analisar/processar pdf
|
7
|
+
class C118jpg < String
|
8
|
+
# @return [String] nome do ficheiro
|
9
|
+
attr_reader :file
|
10
|
+
# @return [String] extensao do ficheiro
|
11
|
+
attr_reader :ext
|
12
|
+
# @return [String] base do ficheiro
|
13
|
+
attr_reader :base
|
14
|
+
|
15
|
+
# @return [String] key do documento ft????/rc????/ex??0??/sc??????
|
16
|
+
attr_reader :key
|
17
|
+
# @return [Numeric] tamanho do jpg
|
18
|
+
attr_reader :size
|
19
|
+
|
20
|
+
# @return [C118jpg] jpg c118
|
21
|
+
def initialize(fjpg)
|
22
|
+
@file = fjpg
|
23
|
+
@ext = File.extname(fjpg).downcase
|
24
|
+
@base = File.basename(fjpg, File.extname(fjpg)).downcase
|
25
|
+
|
26
|
+
@key = @base[/\w+/]
|
27
|
+
@size = File.size(fjpg)
|
28
|
+
end
|
29
|
+
|
30
|
+
def processa_jpg(options, dados)
|
31
|
+
trim(options).jpg2pdf(options).final(dados[key])
|
32
|
+
end
|
33
|
+
|
34
|
+
def trim(options)
|
35
|
+
f = options[:fuzz]
|
36
|
+
h = {}
|
37
|
+
# obter jpg menor triming borders ao maximo
|
38
|
+
while f >= 1
|
39
|
+
system "convert \"#{file}\" -fuzz #{f}% -trim +repage " \
|
40
|
+
"tmp/#{base}#{f}.jpg "
|
41
|
+
h[f] = File.size("tmp/#{base}#{f}.jpg")
|
42
|
+
f -= 4
|
43
|
+
end
|
44
|
+
m = h.min_by { |_, v| v }
|
45
|
+
m[1].between?(LT, size) ? C118jpg.new("tmp/#{base}#{m[0]}.jpg") : self
|
46
|
+
end
|
47
|
+
|
48
|
+
def jpg2pdf(options)
|
49
|
+
o = "tmp/#{base}.pdf"
|
50
|
+
|
51
|
+
# Center image on a larger canvas (with a size given by "-extent").
|
52
|
+
x, y = scale_xy
|
53
|
+
system "convert \"#{file}\" -units PixelsPerInch " \
|
54
|
+
"-gravity center -extent #{x}x#{y} " \
|
55
|
+
"-quality #{options[:quality]}% -compress jpeg -format pdf " \
|
56
|
+
"#{o} 1>/dev/null 2>&1"
|
57
|
+
|
58
|
+
# devolve pdf processado a partir de jpg
|
59
|
+
C118pdf.new(o)
|
60
|
+
end
|
61
|
+
|
62
|
+
def scale_xy
|
63
|
+
# Determine image dimensions in pixels.
|
64
|
+
x, y = FastImage.size(file)
|
65
|
+
|
66
|
+
# Calculate image density (in dpi) needed to fit the image
|
67
|
+
# with a 5% border all around an A4 page.
|
68
|
+
# Factor 1.1 creates 2*5% borders,
|
69
|
+
# Use the higher density to prevent exceeding the required fit.
|
70
|
+
density = [x / X4 * 1.04, y / Y4 * 1.04].max
|
71
|
+
|
72
|
+
# Calculate canvas dimensions in pixels.
|
73
|
+
# (Canvas is an A4 page with the calculated density.)
|
74
|
+
[X4 * density, Y4 * density]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/arquivo/pdf.rb
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'i18n'
|
4
|
+
I18n.config.available_locales = :pt
|
5
|
+
|
6
|
+
module Arquivo
|
7
|
+
# analisar/processar pdf
|
8
|
+
class C118pdf < String
|
9
|
+
def processa_pdf(options, dados)
|
10
|
+
@ppdf = pjpg.trim(options).jpg2pdf(options) if jpg?
|
11
|
+
@ppdf = self if size < ppdf.size
|
12
|
+
ppdf.final(dados[key])
|
13
|
+
end
|
14
|
+
|
15
|
+
def marca
|
16
|
+
# produzir pdf com stamp
|
17
|
+
o = "tmp/stamped-#{base[/-(\w+)/, 1]}-#{key}.pdf"
|
18
|
+
t = '2 2 moveto /Ubuntu findfont 7 scalefont ' \
|
19
|
+
"setfont (#{base}) show"
|
20
|
+
system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{t}\";\
|
21
|
+
pdftk tmp/zip/#{base}.pdf stamp tmp/stamp-#{key}.pdf output #{o}"
|
22
|
+
|
23
|
+
C118pdf.new(o)
|
24
|
+
end
|
25
|
+
|
26
|
+
def final(kda)
|
27
|
+
c118_stamp(kda)
|
28
|
+
o = "tmp/zip/#{base}.pdf"
|
29
|
+
|
30
|
+
if key[0] == 'r'
|
31
|
+
# google producess better && smaller pdf then c118_gs
|
32
|
+
system "cp \"#{file}\" #{o}"
|
33
|
+
else
|
34
|
+
system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" 1>/dev/null 2>&1"
|
35
|
+
end
|
36
|
+
@ppdf = C118pdf.new(o) if File.size(o) <= size
|
37
|
+
ppdf.marca
|
38
|
+
end
|
39
|
+
|
40
|
+
def base_stamp(kda)
|
41
|
+
@base = key + '-' + rubrica(kda) + digest
|
42
|
+
end
|
43
|
+
|
44
|
+
def vnum_stamp(kda)
|
45
|
+
n = kda.inject(0) { |s, e| s + e[4].to_i }.abs
|
46
|
+
@base += '-' + format('%<valor>06d', valor: n)
|
47
|
+
end
|
48
|
+
|
49
|
+
def numb_stamp(kda)
|
50
|
+
d = kda.group_by { |e| e[0][/-(mb\d{8})/, 1] }
|
51
|
+
.keys.join('-')
|
52
|
+
@base += '-' + d unless d.size.zero?
|
53
|
+
end
|
54
|
+
|
55
|
+
def sfim_stamp(kda)
|
56
|
+
if key[0] == 'f'
|
57
|
+
kda.group_by { |e| e[2] }
|
58
|
+
else
|
59
|
+
kda.group_by { |e| e[2][/\d{4}-(\w{3})/, 1] }
|
60
|
+
end.keys.filter { |e| e }.join('-')
|
61
|
+
end
|
62
|
+
|
63
|
+
def c118_stamp(kda)
|
64
|
+
base_stamp(kda)
|
65
|
+
return unless kda
|
66
|
+
|
67
|
+
vnum_stamp(kda)
|
68
|
+
numb_stamp(kda)
|
69
|
+
d = sfim_stamp(kda)
|
70
|
+
return if d.empty?
|
71
|
+
|
72
|
+
@base += '-' + I18n.transliterate(d, locale: :pt)
|
73
|
+
.gsub(/[ [[:punct:]]]/, '-')
|
74
|
+
end
|
75
|
+
|
76
|
+
def rubrica(kda)
|
77
|
+
if kda
|
78
|
+
# rubrica obtida da sheet arquivo
|
79
|
+
# isto permite fazer re-classificacoes de documentos
|
80
|
+
if key[0] == 'f'
|
81
|
+
kda.group_by { |e| e[1] }
|
82
|
+
else
|
83
|
+
kda.group_by { |e| e[3][/\w+/] }
|
84
|
+
end.keys.join('-')
|
85
|
+
else
|
86
|
+
base[/-(\w+)/, 1]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def digest
|
91
|
+
'-' + `sha256sum #{file}`[/\w+/]
|
92
|
+
end
|
93
|
+
|
94
|
+
def jpg?
|
95
|
+
return false if key[0] == 'r'
|
96
|
+
|
97
|
+
o = "tmp/#{base}.txt"
|
98
|
+
# teste scanned pdf (se contem texto -> not scanned)
|
99
|
+
system "pdftotext -q -eol unix -nopgbrk \"#{file}\" #{o}"
|
100
|
+
return false if File.size?(o)
|
101
|
+
|
102
|
+
@pjpg = extract_jpg
|
103
|
+
end
|
104
|
+
|
105
|
+
def extract_jpg
|
106
|
+
o = "tmp/#{base}3.jpg"
|
107
|
+
|
108
|
+
system "pdfimages -q -j #{file} tmp/#{base}2"
|
109
|
+
# nem sempre as imagens sao jpg
|
110
|
+
# somente utilizar a primeira
|
111
|
+
g = Dir.glob("tmp/#{base}2*.???")
|
112
|
+
system "convert #{g[0]} #{o} 1>/dev/null 2>&1"
|
113
|
+
return unless File.size(o) > LT
|
114
|
+
|
115
|
+
C118jpg.new(o)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
metadata
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: arquivo
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hernâni Rodrigues Vaz
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-12-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.17'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.17'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: fastimage
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.1'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.1'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: google-api-client
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.34'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.34'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pdf-reader
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.3'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '2.3'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: thor
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.1'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.1'
|
97
|
+
description: " Write a longer description or delete this line."
|
98
|
+
email:
|
99
|
+
- hernanirvaz@gmail.com
|
100
|
+
executables:
|
101
|
+
- arquivo
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- Gemfile
|
107
|
+
- Gemfile.lock
|
108
|
+
- LICENSE.txt
|
109
|
+
- README.md
|
110
|
+
- Rakefile
|
111
|
+
- arquivo.gemspec
|
112
|
+
- bin/console
|
113
|
+
- bin/setup
|
114
|
+
- exe/arquivo
|
115
|
+
- lib/arquivo.rb
|
116
|
+
- lib/arquivo/dir.rb
|
117
|
+
- lib/arquivo/extrato.rb
|
118
|
+
- lib/arquivo/jpg.rb
|
119
|
+
- lib/arquivo/pdf.rb
|
120
|
+
- lib/arquivo/version.rb
|
121
|
+
homepage: https://github.com/ph1341c118/arquivo
|
122
|
+
licenses:
|
123
|
+
- MIT
|
124
|
+
metadata:
|
125
|
+
homepage_uri: https://github.com/ph1341c118/arquivo
|
126
|
+
yard.run: yard
|
127
|
+
post_install_message:
|
128
|
+
rdoc_options: []
|
129
|
+
require_paths:
|
130
|
+
- lib
|
131
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0'
|
136
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubygems_version: 3.0.3
|
143
|
+
signing_key:
|
144
|
+
specification_version: 4
|
145
|
+
summary: Write a short summary, because RubyGems requires one.
|
146
|
+
test_files: []
|