catalogos_sat 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/catalogos_sat.rb +252 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 41b11713b713805df8f7a78fb4aca19e86feae42
|
4
|
+
data.tar.gz: 80624306049fb3540ef12e781467ffabdd54b254
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 72c7dca4e2f59323b1493ef7571ae2bdc5bff10119805245a6766ce4009cb9048bad18ca55a5376578a62fb74d8789e46bdd8611ec633b161c117f4e73625046
|
7
|
+
data.tar.gz: 32b20b082ece9adb6cd410bf259e939992eaf3498a6c04023b3bb8bb32fde47983cbe2f8e58afaf99bea700c38ec3a6a5d12aadc92ef67ccca500dcf616d5432
|
@@ -0,0 +1,252 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
class Catalogos
|
4
|
+
require 'progressbar'
|
5
|
+
require 'spreadsheet'
|
6
|
+
require 'json'
|
7
|
+
require 'net/http'
|
8
|
+
|
9
|
+
REPLACEMENTS = {
|
10
|
+
'á' => "a",
|
11
|
+
'é' => 'e',
|
12
|
+
'í' => 'i',
|
13
|
+
'ó' => 'o',
|
14
|
+
'ú' => 'u',
|
15
|
+
'ñ' => 'n',
|
16
|
+
'ü' => 'u'
|
17
|
+
}
|
18
|
+
|
19
|
+
attr_accessor :local_eTag
|
20
|
+
|
21
|
+
|
22
|
+
def initialize()
|
23
|
+
@encoding_options = {
|
24
|
+
:invalid => :replace, # Replace invalid byte sequences
|
25
|
+
:replace => "", # Use a blank for those replacements
|
26
|
+
:universal_newline => true, # Always break lines with \n
|
27
|
+
# For any character that isn't defined in ASCII, run this
|
28
|
+
# code to find out how to replace it
|
29
|
+
:fallback => lambda { |char|
|
30
|
+
# If no replacement is specified, use an empty string
|
31
|
+
REPLACEMENTS.fetch(char, "")
|
32
|
+
},
|
33
|
+
}
|
34
|
+
@last_eTag = nil
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def descargar(url_excel = "http://www.sat.gob.mx/informacion_fiscal/factura_electronica/Documents/catCFDI.xls")
|
39
|
+
|
40
|
+
begin
|
41
|
+
puts "Descargando archivo de Excel desde el SAT: #{url_excel}"
|
42
|
+
url_excel = URI.parse(url_excel)
|
43
|
+
bytesDescargados = 0
|
44
|
+
|
45
|
+
httpWork = Net::HTTP.start(url_excel.host) do
|
46
|
+
|http|
|
47
|
+
response = http.request_head(url_excel.path)
|
48
|
+
totalSize = response['content-length'].to_i
|
49
|
+
@local_eTag = response['etag'].split(",")[0]
|
50
|
+
pbar = ProgressBar.create(:title => "Progreso:", :format => "%t %B %p%% %E")
|
51
|
+
|
52
|
+
tempdir = Dir.tmpdir()
|
53
|
+
|
54
|
+
File.open("#{tempdir}/catalogo.xls", "w") do |f|
|
55
|
+
http.get(url_excel.path) do |str|
|
56
|
+
bytesDescargados += str.length
|
57
|
+
relation = 100 * bytesDescargados / totalSize
|
58
|
+
pbar.progress = relation
|
59
|
+
f.write str
|
60
|
+
end
|
61
|
+
pbar.finish()
|
62
|
+
|
63
|
+
end
|
64
|
+
puts "Descarga de Excel finalizada, guardado en #{tempdir}/catalogo.xls"
|
65
|
+
end
|
66
|
+
rescue => e
|
67
|
+
puts "Error al momento de descargar: #{e.message}"
|
68
|
+
raise
|
69
|
+
end
|
70
|
+
|
71
|
+
return true
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
def procesar()
|
77
|
+
|
78
|
+
begin
|
79
|
+
Spreadsheet.client_encoding = 'UTF-8'
|
80
|
+
|
81
|
+
# Checamos que el archivo de Excel exista previamente
|
82
|
+
tempdir = Dir.tmpdir()
|
83
|
+
archivo = "#{tempdir}/catalogo.xls"
|
84
|
+
|
85
|
+
|
86
|
+
raise 'El archivo de catálogos de Excel no existe o no ha sido descargado' if File.exist?(archivo) == false
|
87
|
+
|
88
|
+
final_dir = "catalogosJSON"
|
89
|
+
unless File.exist?("#{tempdir}/#{final_dir}")
|
90
|
+
Dir.mkdir("#{tempdir}/#{final_dir}")
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
book = Spreadsheet.open(archivo)
|
95
|
+
en_partes = false
|
96
|
+
ultima_parte = false
|
97
|
+
encabezados = Array.new
|
98
|
+
renglones_json = nil
|
99
|
+
|
100
|
+
# Recorremos todas las hojas/catálogos
|
101
|
+
for i in 0..book.worksheets.count - 1 do
|
102
|
+
hoja = book.worksheet i
|
103
|
+
|
104
|
+
puts "\n\n----------------------------------------------"
|
105
|
+
puts "Conviertiendo a JSON hoja #{hoja.name}..."
|
106
|
+
|
107
|
+
# Manejamos la lectura de dos hojas separadas en partes, como la de Codigo Postal
|
108
|
+
if hoja.name.index("_Parte_") != nil
|
109
|
+
en_partes = true
|
110
|
+
ultima_parte = hoja.name.index("_Parte_2") != nil
|
111
|
+
#TODO asume que hay como maximo 2 partes por archivo y que el identificador siempre es "_Parte_X"
|
112
|
+
end
|
113
|
+
|
114
|
+
# Recorremos todos los renglones de la hoja de Excel
|
115
|
+
j = 0
|
116
|
+
hoja.each do |row|
|
117
|
+
j += 1
|
118
|
+
# Nos saltamos el primer renglon ya que siempre tiene la descripcion del catálogo, ejem "Catálogo de aduanas ..."
|
119
|
+
next if j == 1
|
120
|
+
|
121
|
+
break if row.to_s.index("Continúa en") != nil
|
122
|
+
next if row.formats[0] == nil
|
123
|
+
# Nos saltamos renglones vacios
|
124
|
+
next if row.to_s.index("[nil") != nil
|
125
|
+
next if (row.to_s.index('["Fecha inicio de vigencia", "Fecha fin de vigencia", "Versión", "Revisión"]') != nil) && (ultima_parte == true)
|
126
|
+
|
127
|
+
if row.formats[0].pattern_fg_color == :silver then
|
128
|
+
if renglones_json.nil? then
|
129
|
+
puts "Ignorando: #{row}"
|
130
|
+
renglones_json = Array.new
|
131
|
+
encabezados = Array.new
|
132
|
+
else
|
133
|
+
# Segundo encabezado, el "real"
|
134
|
+
# Si ya tenemos encabezados nos salimos
|
135
|
+
next if encabezados.count > 0
|
136
|
+
row.each do |col|
|
137
|
+
# HACK: Para poder poner los valores correspondientes tomando en cuenta los encabezados
|
138
|
+
if hoja.name == "c_TasaOCuota"
|
139
|
+
col = "maximo" if col == nil
|
140
|
+
col = "minimo" if col == "c_TasaOCuota"
|
141
|
+
end
|
142
|
+
|
143
|
+
next if col == nil
|
144
|
+
# Si el nombre de la columna es el mismo que la hoja entonces es el "id" del catálogo
|
145
|
+
col = "id" if hoja.name.index(col.to_s) != nil
|
146
|
+
nombre = col.to_s
|
147
|
+
# Convertimos a ASCII valido
|
148
|
+
nombre = nombre.encode(Encoding.find('ASCII'), @encoding_options)
|
149
|
+
# Convertimos la primer letra a minuscula
|
150
|
+
nombre[0] = nombre[0].chr.downcase
|
151
|
+
# La convertimos a camelCase para seguir la guia de JSON de Google:
|
152
|
+
# https://google.github.io/styleguide/jsoncstyleguide.xml
|
153
|
+
nombre = nombre.gsub(/\s(.)/) {|e| $1.upcase}
|
154
|
+
|
155
|
+
encabezados << nombre
|
156
|
+
end
|
157
|
+
|
158
|
+
next
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Solo procedemos si ya hubo encabezados
|
163
|
+
if encabezados.count > 0 then
|
164
|
+
#puts encabezados.to_s
|
165
|
+
# Si la columna es tipo fecha nos la saltamos ya que es probable
|
166
|
+
# que sea el valor de la fecha de modificacion del catálogo
|
167
|
+
next if row[0].class == Date
|
168
|
+
|
169
|
+
hash_renglon = Hash.new
|
170
|
+
for k in 0..encabezados.count - 1
|
171
|
+
next if encabezados[k].to_s == ""
|
172
|
+
if row[k].instance_of?(Spreadsheet::Formula) == true
|
173
|
+
valor = row[k].value
|
174
|
+
else
|
175
|
+
if row[k].class == Float
|
176
|
+
if hoja.name == "c_Impuesto"
|
177
|
+
#puts "poniendo a tres cero"
|
178
|
+
valor = "%03d" % row[k].to_i
|
179
|
+
else
|
180
|
+
#puts "poniendo a 2 ceros: " + "%02d" % row[k].to_i
|
181
|
+
valor = "%02d" % row[k].to_i
|
182
|
+
end
|
183
|
+
else
|
184
|
+
valor = row[k].to_s
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
hash_renglon[encabezados[k]] = valor
|
189
|
+
end
|
190
|
+
renglones_json << hash_renglon
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# Guardamos el contenido JSON
|
195
|
+
if !en_partes || ultima_parte then
|
196
|
+
puts "Escribiendo archivo JSON..."
|
197
|
+
hoja.name.sub!(/(_Parte_\d+)$/, '') if ultima_parte
|
198
|
+
File.open("#{tempdir}/#{final_dir}/#{hoja.name}.json","w") do |f|
|
199
|
+
f.write(JSON.pretty_generate(renglones_json))
|
200
|
+
end
|
201
|
+
renglones_json = nil
|
202
|
+
en_partes = false
|
203
|
+
ultima_parte = false
|
204
|
+
encabezados = Array.new
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
puts "---------------------------------------------------------"
|
211
|
+
puts "Se finalizó creacion de JSONs en directorio: #{tempdir}"
|
212
|
+
|
213
|
+
rescue => e
|
214
|
+
puts "Error en generacion de JSONs: #{e.message}"
|
215
|
+
raise
|
216
|
+
end
|
217
|
+
|
218
|
+
return true
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
def nuevo_xls?(local_eTag = nil, url_excel = "http://www.sat.gob.mx/informacion_fiscal/factura_electronica/Documents/catCFDI.xls")
|
223
|
+
local_eTag = @local_eTag if local_eTag.nil?
|
224
|
+
url_excel = URI.parse(url_excel)
|
225
|
+
new_eTag = nil
|
226
|
+
|
227
|
+
httpWork = Net::HTTP.start(url_excel.host) do
|
228
|
+
|http|
|
229
|
+
response = http.request_head(url_excel.path)
|
230
|
+
new_eTag = response['etag'].split(",")[0]
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
return new_eTag != local_eTag
|
235
|
+
|
236
|
+
end
|
237
|
+
|
238
|
+
def main(local_eTag = nil, url_excel = "http://www.sat.gob.mx/informacion_fiscal/factura_electronica/Documents/catCFDI.xls")
|
239
|
+
|
240
|
+
if (nuevo_xls?(local_eTag, url_excel))
|
241
|
+
descargar(url_excel)
|
242
|
+
procesar()
|
243
|
+
end
|
244
|
+
|
245
|
+
return true
|
246
|
+
|
247
|
+
|
248
|
+
end
|
249
|
+
|
250
|
+
end
|
251
|
+
|
252
|
+
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: catalogos_sat
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- BambuCode
|
8
|
+
- Ricardo Trevizo
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2017-11-01 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Utilerias para generar JSONS de los catalogos del SAT en Mexico. Descarga
|
15
|
+
el archivo .xls que el sat proporciona y parsea las columnas y filas
|
16
|
+
email: hola@bambucode.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/catalogos_sat.rb
|
22
|
+
homepage: http://rubygems.org/gems/catalogos_sat
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 2.6.12
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Script para generar JSONS de catalogos del SAT
|
46
|
+
test_files: []
|