zipdatev 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zip"
4
+ require "fileutils"
5
+
6
+ module ZipDatev
7
+ # Main entry point for creating DATEV-compliant ZIP packages.
8
+ #
9
+ # A package collects multiple documents (invoices with attachments)
10
+ # and generates a ZIP file containing document.xml and ledger XML files.
11
+ #
12
+ # @example Creating a package with invoices
13
+ # package = ZipDatev::Package.new(
14
+ # generator_info: "MyCompany",
15
+ # generating_system: "MyApp"
16
+ # )
17
+ #
18
+ # package.add_document(
19
+ # invoice: invoice,
20
+ # attachments: ["/path/to/invoice.pdf"],
21
+ # invoice_month: "2023-01",
22
+ # folder_name: "Eingangsrechnungen"
23
+ # )
24
+ #
25
+ # package.build("/path/to/output.zip")
26
+ class Package
27
+ # Size limits as specified in DATEV documentation
28
+ MAX_PACKAGE_SIZE = 465 * 1024 * 1024 # 465 MB
29
+ MAX_FILE_SIZE = 20 * 1024 * 1024 # 20 MB
30
+ MAX_DOCUMENTS = 5000
31
+
32
+ # Pattern for valid ASCII filenames (no special characters that cause issues)
33
+ ASCII_FILENAME_PATTERN = /\A[\x20-\x7E]+\z/
34
+
35
+ attr_reader :generator_info, :generating_system, :documents
36
+
37
+ def initialize(generator_info:, generating_system:)
38
+ @generator_info = generator_info
39
+ @generating_system = generating_system
40
+ @documents = []
41
+ end
42
+
43
+ # Add a document to the package.
44
+ #
45
+ # @param invoice [Invoice] The invoice data
46
+ # @param attachments [Array<String>] Paths to attachment files
47
+ # @param invoice_month [String] Invoice month in "YYYY-MM" format
48
+ # @param folder_name [String] Folder name for filing
49
+ # @param repository [Repository, nil] Optional repository structure
50
+ # @return [Document] The created document
51
+ def add_document(invoice:, attachments: [], invoice_month: nil, folder_name: nil, repository: nil)
52
+ document = Document.new(
53
+ invoice: invoice,
54
+ attachments: attachments,
55
+ invoice_month: invoice_month,
56
+ folder_name: folder_name,
57
+ repository: repository
58
+ )
59
+ @documents << document
60
+ document
61
+ end
62
+
63
+ # Build the ZIP package.
64
+ #
65
+ # @param output_path [String] Path for the output ZIP file
66
+ # @raise [PackageError] If package creation fails
67
+ # @raise [ValidationError] If validation fails
68
+ # @raise [SchemaValidationError] If XSD validation fails
69
+ # @return [String] The output path
70
+ def build(output_path)
71
+ raise PackageError, "No documents added to package" if @documents.empty?
72
+ raise PackageError, "Too many documents (max #{MAX_DOCUMENTS})" if @documents.size > MAX_DOCUMENTS
73
+
74
+ # 1. Validate all documents
75
+ validate_documents!
76
+
77
+ # 2. Generate XMLs and validate against XSD schemas
78
+ document_xml = generate_and_validate_document_xml
79
+ ledger_xmls = generate_and_validate_ledger_xmls
80
+
81
+ # 3. Validate attachment files
82
+ validate_attachments!
83
+
84
+ # 4. Create ZIP archive
85
+ create_zip(output_path, document_xml, ledger_xmls)
86
+
87
+ output_path
88
+ end
89
+
90
+ private
91
+
92
+ # Validate all documents in the package
93
+ #
94
+ # @raise [ValidationError] If any document validation fails
95
+ def validate_documents!
96
+ validation_errors = []
97
+
98
+ @documents.each_with_index do |document, index|
99
+ next if document.valid?
100
+
101
+ document.errors.full_messages.each do |message|
102
+ validation_errors << "Document #{index + 1}: #{message}"
103
+ end
104
+ end
105
+
106
+ return if validation_errors.empty?
107
+
108
+ raise ValidationError.new("Document validation failed", errors: validation_errors)
109
+ end
110
+
111
+ # Generate document.xml and validate against XSD schema
112
+ #
113
+ # @return [Nokogiri::XML::Document] The generated document.xml
114
+ # @raise [SchemaValidationError] If XSD validation fails
115
+ def generate_and_validate_document_xml
116
+ generator = Generators::DocumentXml.new(package: self)
117
+ doc = generator.generate
118
+
119
+ SchemaValidator.validate_document_xml(doc)
120
+
121
+ doc
122
+ end
123
+
124
+ # Generate ledger XML files for each document and validate against XSD schema
125
+ #
126
+ # @return [Array<Hash>] Array of {filename:, doc:} hashes
127
+ # @raise [SchemaValidationError] If XSD validation fails
128
+ def generate_and_validate_ledger_xmls
129
+ @documents.map do |document|
130
+ generator = Generators::LedgerXml.new(
131
+ invoice: document.invoice,
132
+ generator_info: @generator_info,
133
+ generating_system: @generating_system
134
+ )
135
+ doc = generator.generate
136
+ filename = generator.filename
137
+
138
+ SchemaValidator.validate_ledger_xml(doc)
139
+
140
+ { filename: filename, doc: doc }
141
+ end
142
+ end
143
+
144
+ # Validate all attachment files
145
+ #
146
+ # @raise [PackageError] If any attachment fails validation
147
+ def validate_attachments!
148
+ @documents.each do |document|
149
+ document.attachments.each do |path|
150
+ validate_attachment_file!(path)
151
+ end
152
+ end
153
+ end
154
+
155
+ # Validate a single attachment file
156
+ #
157
+ # @param path [String] Path to the attachment file
158
+ # @raise [PackageError] If validation fails
159
+ def validate_attachment_file!(path)
160
+ filename = File.basename(path)
161
+
162
+ # Check ASCII filename
163
+ unless filename.match?(ASCII_FILENAME_PATTERN)
164
+ raise PackageError, "Attachment filename contains non-ASCII characters: #{filename}"
165
+ end
166
+
167
+ # Check file size
168
+ file_size = File.size(path)
169
+ return unless file_size > MAX_FILE_SIZE
170
+
171
+ raise PackageError,
172
+ "Attachment file too large (#{file_size} bytes, max #{MAX_FILE_SIZE}): #{filename}"
173
+ end
174
+
175
+ # Create the ZIP archive
176
+ #
177
+ # @param output_path [String] Path for the output ZIP file
178
+ # @param document_xml [Nokogiri::XML::Document] The document.xml content
179
+ # @param ledger_xmls [Array<Hash>] Array of {filename:, doc:} hashes
180
+ # @raise [PackageError] If ZIP creation fails or package is too large
181
+ def create_zip(output_path, document_xml, ledger_xmls)
182
+ # Ensure output directory exists
183
+ FileUtils.mkdir_p(File.dirname(output_path))
184
+
185
+ # Track filenames to detect duplicates
186
+ used_filenames = Set.new
187
+
188
+ begin
189
+ Zip::File.open(output_path, create: true) do |zipfile|
190
+ # Add document.xml
191
+ add_xml_to_zip(zipfile, "document.xml", document_xml, used_filenames)
192
+
193
+ # Add ledger XML files
194
+ ledger_xmls.each do |ledger|
195
+ add_xml_to_zip(zipfile, ledger[:filename], ledger[:doc], used_filenames)
196
+ end
197
+
198
+ # Add attachment files
199
+ @documents.each do |document|
200
+ document.attachments.each do |path|
201
+ add_attachment_to_zip(zipfile, path, used_filenames)
202
+ end
203
+ end
204
+ end
205
+
206
+ # Validate final package size
207
+ package_size = File.size(output_path)
208
+ if package_size > MAX_PACKAGE_SIZE
209
+ File.delete(output_path)
210
+ raise PackageError,
211
+ "Package too large (#{package_size} bytes, max #{MAX_PACKAGE_SIZE})"
212
+ end
213
+ rescue Zip::Error => e
214
+ # Clean up partial file on error
215
+ FileUtils.rm_f(output_path)
216
+ raise PackageError, "Failed to create ZIP archive: #{e.message}"
217
+ end
218
+ end
219
+
220
+ # Add an XML document to the ZIP file
221
+ #
222
+ # @param zipfile [Zip::File] The ZIP file being created
223
+ # @param filename [String] The filename within the ZIP
224
+ # @param xml_doc [Nokogiri::XML::Document] The XML document
225
+ # @param used_filenames [Set] Set of already used filenames
226
+ # @raise [PackageError] If filename is duplicate
227
+ def add_xml_to_zip(zipfile, filename, xml_doc, used_filenames)
228
+ check_duplicate_filename!(filename, used_filenames)
229
+
230
+ xml_string = xml_doc.to_xml(encoding: "UTF-8")
231
+
232
+ # Validate XML string size
233
+ if xml_string.bytesize > MAX_FILE_SIZE
234
+ raise PackageError,
235
+ "Generated XML too large (#{xml_string.bytesize} bytes, max #{MAX_FILE_SIZE}): #{filename}"
236
+ end
237
+
238
+ zipfile.get_output_stream(filename) do |stream|
239
+ stream.write(xml_string)
240
+ end
241
+ end
242
+
243
+ # Add an attachment file to the ZIP
244
+ #
245
+ # @param zipfile [Zip::File] The ZIP file being created
246
+ # @param path [String] Path to the attachment file
247
+ # @param used_filenames [Set] Set of already used filenames
248
+ # @raise [PackageError] If filename is duplicate
249
+ def add_attachment_to_zip(zipfile, path, used_filenames)
250
+ filename = File.basename(path)
251
+ check_duplicate_filename!(filename, used_filenames)
252
+
253
+ zipfile.add(filename, path)
254
+ end
255
+
256
+ # Check for duplicate filenames
257
+ #
258
+ # @param filename [String] The filename to check
259
+ # @param used_filenames [Set] Set of already used filenames
260
+ # @raise [PackageError] If filename is duplicate
261
+ def check_duplicate_filename!(filename, used_filenames)
262
+ raise PackageError, "Duplicate filename in package: #{filename}" if used_filenames.include?(filename)
263
+
264
+ used_filenames.add(filename)
265
+ end
266
+ end
267
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ZipDatev
4
+ # Represents a 3-level filing structure for DATEV documents.
5
+ #
6
+ # The repository structure defines how documents are organized in
7
+ # DATEV Unternehmen online. It consists of three levels:
8
+ # - Level 1: Category (e.g., "Software Name")
9
+ # - Level 2: Folder (e.g., "Belege")
10
+ # - Level 3: Register (e.g., "2023/01")
11
+ #
12
+ # If no repository is specified, DATEV uses a default structure:
13
+ # "Buchhaltung/IMPORT YYYY RE/MM Monatsname"
14
+ #
15
+ # @example Creating a repository structure
16
+ # repository = ZipDatev::Repository.new(
17
+ # level1: "MyApp",
18
+ # level2: "Eingangsrechnungen",
19
+ # level3: "2023/01"
20
+ # )
21
+ class Repository
22
+ # @return [String, nil] Level 1 - Category name
23
+ attr_reader :level1
24
+
25
+ # @return [String, nil] Level 2 - Folder name
26
+ attr_reader :level2
27
+
28
+ # @return [String, nil] Level 3 - Register name
29
+ attr_reader :level3
30
+
31
+ # Initialize a new repository structure.
32
+ #
33
+ # @param level1 [String, nil] Category name
34
+ # @param level2 [String, nil] Folder name
35
+ # @param level3 [String, nil] Register name
36
+ def initialize(level1: nil, level2: nil, level3: nil)
37
+ @level1 = level1
38
+ @level2 = level2
39
+ @level3 = level3
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module ZipDatev
6
+ # Validates XML documents against DATEV XSD schemas.
7
+ #
8
+ # This validator ensures that generated XML conforms to DATEV's
9
+ # XSD specifications before creating the ZIP package.
10
+ #
11
+ # @example Validate document.xml using class method
12
+ # xml_string = "<archive>...</archive>"
13
+ # ZipDatev::SchemaValidator.validate_document_xml(xml_string)
14
+ #
15
+ # @example Validate ledger XML using class method
16
+ # xml_doc = generator.generate
17
+ # ZipDatev::SchemaValidator.validate_ledger_xml(xml_doc)
18
+ #
19
+ # @example Using instance methods (reuses cached schemas)
20
+ # validator = ZipDatev::SchemaValidator.new
21
+ # validator.validate_document_xml(doc1)
22
+ # validator.validate_ledger_xml(ledger1)
23
+ # validator.validate_ledger_xml(ledger2)
24
+ class SchemaValidator
25
+ # Schema file paths relative to this file
26
+ SCHEMAS_DIR = File.join(__dir__, "schemas")
27
+ DOCUMENT_SCHEMA_PATH = File.join(SCHEMAS_DIR, "Document_v060.xsd")
28
+ LEDGER_SCHEMA_PATH = File.join(SCHEMAS_DIR, "Belegverwaltung_online_ledger_import_v060.xsd")
29
+
30
+ # Mutex for thread-safe schema loading (Dir.chdir affects entire process)
31
+ SCHEMA_LOAD_MUTEX = Thread::Mutex.new
32
+ private_constant :SCHEMA_LOAD_MUTEX
33
+
34
+ # Validate document.xml against Document_v060.xsd
35
+ #
36
+ # @param xml [String, Nokogiri::XML::Document] The XML to validate
37
+ # @raise [SchemaValidationError] If validation fails
38
+ # @return [true] If validation succeeds
39
+ def self.validate_document_xml(xml)
40
+ new.validate_document_xml(xml)
41
+ end
42
+
43
+ # Validate ledger XML against Belegverwaltung_online_ledger_import_v060.xsd
44
+ #
45
+ # @param xml [String, Nokogiri::XML::Document] The XML to validate
46
+ # @raise [SchemaValidationError] If validation fails
47
+ # @return [true] If validation succeeds
48
+ def self.validate_ledger_xml(xml)
49
+ new.validate_ledger_xml(xml)
50
+ end
51
+
52
+ # Validate document.xml against Document_v060.xsd
53
+ #
54
+ # @param xml [String, Nokogiri::XML::Document] The XML to validate
55
+ # @raise [SchemaValidationError] If validation fails
56
+ # @return [true] If validation succeeds
57
+ def validate_document_xml(xml)
58
+ validate(xml, document_schema, "Document XML")
59
+ end
60
+
61
+ # Validate ledger XML against Belegverwaltung_online_ledger_import_v060.xsd
62
+ #
63
+ # @param xml [String, Nokogiri::XML::Document] The XML to validate
64
+ # @raise [SchemaValidationError] If validation fails
65
+ # @return [true] If validation succeeds
66
+ def validate_ledger_xml(xml)
67
+ validate(xml, ledger_schema, "Ledger XML")
68
+ end
69
+
70
+ private
71
+
72
+ # Lazy-load document schema
73
+ def document_schema
74
+ @document_schema ||= load_schema(DOCUMENT_SCHEMA_PATH)
75
+ end
76
+
77
+ # Lazy-load ledger schema
78
+ def ledger_schema
79
+ @ledger_schema ||= load_schema(LEDGER_SCHEMA_PATH)
80
+ end
81
+
82
+ # Load XSD schema from file path
83
+ #
84
+ # Uses a mutex to ensure thread-safety when changing directories
85
+ # to resolve relative XSD imports.
86
+ #
87
+ # @param path [String] Path to the XSD schema file
88
+ # @return [Nokogiri::XML::Schema] Loaded schema
89
+ # @raise [PackageError] If schema file is not found
90
+ def load_schema(path)
91
+ raise PackageError, "Schema file not found: #{path}" unless File.exist?(path)
92
+
93
+ # Change to schema directory to resolve relative imports
94
+ # (XSD files use schemaLocation="Document_types_v060.xsd" etc.)
95
+ # Use mutex for thread safety since Dir.chdir affects entire process
96
+ SCHEMA_LOAD_MUTEX.synchronize do
97
+ Dir.chdir(SCHEMAS_DIR) do
98
+ Nokogiri::XML::Schema(File.read(path))
99
+ end
100
+ end
101
+ end
102
+
103
+ # Validate XML against schema
104
+ #
105
+ # @param xml [String, Nokogiri::XML::Document] The XML to validate
106
+ # @param schema [Nokogiri::XML::Schema] The schema to validate against
107
+ # @param context_name [String] Name for error messages (e.g., "Document XML")
108
+ # @raise [SchemaValidationError] If validation fails
109
+ # @return [true] If validation succeeds
110
+ def validate(xml, schema, context_name)
111
+ doc = parse_xml(xml)
112
+ errors = schema.validate(doc)
113
+
114
+ return true if errors.empty?
115
+
116
+ # Format error messages with line numbers
117
+ error_messages = errors.map do |error|
118
+ format_error(error)
119
+ end
120
+
121
+ raise SchemaValidationError.new(
122
+ "#{context_name} validation failed",
123
+ schema_errors: error_messages
124
+ )
125
+ end
126
+
127
+ # Parse XML input (string or document)
128
+ #
129
+ # @param xml [String, Nokogiri::XML::Document] The XML to parse
130
+ # @return [Nokogiri::XML::Document] Parsed XML document
131
+ def parse_xml(xml)
132
+ return xml if xml.is_a?(Nokogiri::XML::Document)
133
+
134
+ Nokogiri::XML(xml) do |config|
135
+ config.strict.nonet
136
+ end
137
+ end
138
+
139
+ # Format schema validation error with line number
140
+ #
141
+ # @param error [Nokogiri::XML::SyntaxError] The validation error
142
+ # @return [String] Formatted error message
143
+ def format_error(error)
144
+ if error.line&.positive?
145
+ "Line #{error.line}: #{error.message}"
146
+ else
147
+ error.message
148
+ end
149
+ end
150
+ end
151
+ end