avrotize 3.0.1__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avrotize/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.0.1'
32
- __version_tuple__ = version_tuple = (3, 0, 1)
31
+ __version__ = version = '3.1.0'
32
+ __version_tuple__ = version_tuple = (3, 1, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
avrotize/avrotize.py CHANGED
@@ -38,6 +38,10 @@ def create_subparsers(subparsers, commands):
38
38
  kwargs['choices'] = arg['choices']
39
39
  if 'default' in arg:
40
40
  kwargs['default'] = arg['default']
41
+ # Handle dest for optional arguments only (positional args can't have dest)
42
+ arg_is_positional = not arg['name'].startswith('-')
43
+ if 'dest' in arg and not arg_is_positional:
44
+ kwargs['dest'] = arg['dest']
41
45
  if arg['type'] == 'bool':
42
46
  kwargs['action'] = 'store_true'
43
47
  del kwargs['type']
avrotize/avrotoiceberg.py CHANGED
@@ -5,6 +5,7 @@ import sys
5
5
  from typing import Dict, List
6
6
  import pyarrow as pa
7
7
  from pyiceberg.schema import Schema, NestedField
8
+ from pyiceberg.io.pyarrow import PyArrowFileIO, schema_to_pyarrow
8
9
  from pyiceberg.types import (
9
10
  BooleanType,
10
11
  IntegerType,
@@ -21,11 +22,74 @@ from pyiceberg.types import (
21
22
  MapType,
22
23
  StructType
23
24
  )
24
- from pyiceberg.io.pyarrow import PyArrowFileIO, schema_to_pyarrow
25
25
 
26
26
  JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
27
27
 
28
28
 
29
+ def iceberg_type_to_json(iceberg_type) -> str | Dict:
30
+ """
31
+ Serialize an Iceberg type to JSON per Iceberg Table Spec Appendix C.
32
+
33
+ Primitive types are serialized as strings. Complex types (struct, list, map)
34
+ are serialized as JSON objects with their nested structure.
35
+ """
36
+ # Primitive types map to simple strings
37
+ if isinstance(iceberg_type, BooleanType):
38
+ return "boolean"
39
+ elif isinstance(iceberg_type, IntegerType):
40
+ return "int"
41
+ elif isinstance(iceberg_type, LongType):
42
+ return "long"
43
+ elif isinstance(iceberg_type, FloatType):
44
+ return "float"
45
+ elif isinstance(iceberg_type, DoubleType):
46
+ return "double"
47
+ elif isinstance(iceberg_type, StringType):
48
+ return "string"
49
+ elif isinstance(iceberg_type, BinaryType):
50
+ return "binary"
51
+ elif isinstance(iceberg_type, DateType):
52
+ return "date"
53
+ elif isinstance(iceberg_type, TimestampType):
54
+ return "timestamp"
55
+ elif isinstance(iceberg_type, DecimalType):
56
+ return f"decimal({iceberg_type.precision},{iceberg_type.scale})"
57
+ elif isinstance(iceberg_type, FixedType):
58
+ return f"fixed[{iceberg_type.length}]"
59
+ elif isinstance(iceberg_type, ListType):
60
+ return {
61
+ "type": "list",
62
+ "element-id": iceberg_type.element_id,
63
+ "element-required": iceberg_type.element_required,
64
+ "element": iceberg_type_to_json(iceberg_type.element_type)
65
+ }
66
+ elif isinstance(iceberg_type, MapType):
67
+ return {
68
+ "type": "map",
69
+ "key-id": iceberg_type.key_id,
70
+ "key": iceberg_type_to_json(iceberg_type.key_type),
71
+ "value-id": iceberg_type.value_id,
72
+ "value-required": iceberg_type.value_required,
73
+ "value": iceberg_type_to_json(iceberg_type.value_type)
74
+ }
75
+ elif isinstance(iceberg_type, StructType):
76
+ return {
77
+ "type": "struct",
78
+ "fields": [
79
+ {
80
+ "id": field.field_id,
81
+ "name": field.name,
82
+ "required": field.required,
83
+ "type": iceberg_type_to_json(field.field_type)
84
+ }
85
+ for field in iceberg_type.fields
86
+ ]
87
+ }
88
+ else:
89
+ # Fallback for unknown types
90
+ return str(iceberg_type)
91
+
92
+
29
93
  class AvroToIcebergConverter:
30
94
  """Class to convert Avro schema to Iceberg schema."""
31
95
 
@@ -42,8 +106,16 @@ class AvroToIcebergConverter:
42
106
  """Get the full name of a record type."""
43
107
  return f"{namespace}.{name}" if namespace else name
44
108
 
45
- def convert_avro_to_iceberg(self, avro_schema_path: str, avro_record_type: str, output_path: str, emit_cloudevents_columns: bool=False):
46
- """Convert an Avro schema to an Iceberg schema."""
109
+ def convert_avro_to_iceberg(self, avro_schema_path: str, avro_record_type: str, output_path: str, emit_cloudevents_columns: bool=False, output_format: str="arrow"):
110
+ """Convert an Avro schema to an Iceberg schema.
111
+
112
+ Args:
113
+ avro_schema_path: Path to the Avro schema file
114
+ avro_record_type: Record type to convert (or None for the root)
115
+ output_path: Path to write the Iceberg schema
116
+ emit_cloudevents_columns: Whether to add CloudEvents columns
117
+ output_format: Output format - 'arrow' for binary Arrow IPC (default), 'schema' for JSON
118
+ """
47
119
  schema_file = avro_schema_path
48
120
  if not schema_file:
49
121
  print("Please specify the avro schema file")
@@ -96,14 +168,32 @@ class AvroToIcebergConverter:
96
168
  ])
97
169
 
98
170
  iceberg_schema = Schema(*iceberg_fields)
99
- arrow_schema = schema_to_pyarrow(iceberg_schema)
100
- print(f"Iceberg schema created: {arrow_schema}")
171
+ print(f"Iceberg schema created: {iceberg_schema}")
101
172
 
102
- # Write to Iceberg table (for demonstration, using local file system)
103
- file_io = PyArrowFileIO()
104
- output_file = file_io.new_output("file://"+output_path)
105
- with output_file.create(overwrite=True) as f:
106
- pa.output_stream(f).write(arrow_schema.serialize().to_pybytes())
173
+ if output_format == "arrow":
174
+ # Write as binary PyArrow schema
175
+ arrow_schema = schema_to_pyarrow(iceberg_schema)
176
+ file_io = PyArrowFileIO()
177
+ output_file = file_io.new_output("file://" + output_path)
178
+ with output_file.create(overwrite=True) as f:
179
+ pa.output_stream(f).write(arrow_schema.serialize().to_pybytes())
180
+ else:
181
+ # Write Iceberg schema as spec-compliant JSON (per Iceberg Table Spec Appendix C)
182
+ schema_json = {
183
+ "type": "struct",
184
+ "schema-id": 0,
185
+ "fields": [
186
+ {
187
+ "id": field.field_id,
188
+ "name": field.name,
189
+ "required": field.required,
190
+ "type": iceberg_type_to_json(field.field_type)
191
+ }
192
+ for field in iceberg_schema.fields
193
+ ]
194
+ }
195
+ with open(output_path, "w", encoding="utf-8") as f:
196
+ json.dump(schema_json, f, indent=2)
107
197
 
108
198
  def convert_avro_type_to_iceberg_type(self, avro_type):
109
199
  """Convert an Avro type to an Iceberg type."""
@@ -203,8 +293,16 @@ class AvroToIcebergConverter:
203
293
  return StringType()
204
294
 
205
295
 
206
- def convert_avro_to_iceberg(avro_schema_path, avro_record_type, output_path, emit_cloudevents_columns=False):
207
- """Convert an Avro schema to an Iceberg schema."""
296
+ def convert_avro_to_iceberg(avro_schema_path, avro_record_type, output_path, emit_cloudevents_columns=False, output_format="arrow"):
297
+ """Convert an Avro schema to an Iceberg schema.
298
+
299
+ Args:
300
+ avro_schema_path: Path to the Avro schema file
301
+ avro_record_type: Record type to convert (or None for the root)
302
+ output_path: Path to write the Iceberg schema
303
+ emit_cloudevents_columns: Whether to add CloudEvents columns
304
+ output_format: Output format - 'arrow' for binary Arrow IPC (default), 'schema' for JSON
305
+ """
208
306
  converter = AvroToIcebergConverter()
209
307
  converter.convert_avro_to_iceberg(
210
- avro_schema_path, avro_record_type, output_path, emit_cloudevents_columns)
308
+ avro_schema_path, avro_record_type, output_path, emit_cloudevents_columns, output_format)
@@ -8,7 +8,7 @@ import { jsonObject, jsonMember, TypedJSON } from 'typedjson';
8
8
  {%- endif %}
9
9
  {%- endif %}
10
10
  {%- if avro_annotation %}
11
- import avro from 'avro-js';
11
+ import avro, { type Type } from 'avro-js';
12
12
  {%- endif %}
13
13
  {%- for import_type, import_path in imports.items() %}
14
14
  import { {{ import_type }} } from '{{ import_path }}';
@@ -22,7 +22,7 @@ import pako from 'pako';
22
22
  {%- endif %}
23
23
  export class {{ class_name }} {
24
24
  {%- if avro_annotation %}
25
- public static AvroType: avro.Type = avro.parse({{ avro_schema_json }});
25
+ public static AvroType: Type = avro.parse({{ avro_schema_json }});
26
26
  {%- endif %}
27
27
 
28
28
  {%- for field in fields %}
avrotize/avrotots.py CHANGED
@@ -500,10 +500,11 @@ class AvroToTypeScript:
500
500
  """Generate TypeScript type declaration file for avro-js module."""
501
501
  avro_js_types = '''declare module 'avro-js' {
502
502
  /**
503
- * Avro Type representation.
503
+ * Avro Type interface.
504
+ * Represents the structure of Type instances returned by avro.parse().
504
505
  * Provides methods for encoding, decoding, and validating Avro data.
505
506
  */
506
- export class Type {
507
+ export interface Type {
507
508
  /**
508
509
  * Encode a value to a Buffer.
509
510
  * @param obj - Value to encode
@@ -575,12 +576,66 @@ class AvroToTypeScript:
575
576
  }
576
577
 
577
578
  /**
578
- * Parse an Avro schema and return a Type instance.
579
- * @param schema - Schema as string or object
580
- * @param options - Parse options
581
- * @returns Type instance
579
+ * avro-js default export interface.
580
+ * This module is CommonJS, so in ESM context it only has a default export.
582
581
  */
583
- export function parse(schema: string | any, options?: any): Type;
582
+ export interface Avro {
583
+ /**
584
+ * Type class constructor.
585
+ */
586
+ Type: any;
587
+
588
+ /**
589
+ * Parse an Avro schema and return a Type instance.
590
+ * @param schema - Schema as string or object
591
+ * @param options - Parse options
592
+ * @returns Type instance
593
+ */
594
+ parse(schema: string | any, options?: any): Type;
595
+
596
+ /**
597
+ * Protocol class constructor.
598
+ */
599
+ Protocol: any;
600
+
601
+ /**
602
+ * Create a file decoder.
603
+ */
604
+ createFileDecoder(path: string, options?: any): any;
605
+
606
+ /**
607
+ * Create a file encoder.
608
+ */
609
+ createFileEncoder(path: string, schema: any, options?: any): any;
610
+
611
+ /**
612
+ * Extract file header.
613
+ */
614
+ extractFileHeader(buffer: Buffer): any;
615
+
616
+ /**
617
+ * Streams utilities.
618
+ */
619
+ streams: any;
620
+
621
+ /**
622
+ * Built-in types.
623
+ */
624
+ types: any;
625
+
626
+ /**
627
+ * Validator (deprecated).
628
+ */
629
+ Validator: any;
630
+
631
+ /**
632
+ * ProtocolValidator (deprecated).
633
+ */
634
+ ProtocolValidator: any;
635
+ }
636
+
637
+ const avro: Avro;
638
+ export default avro;
584
639
  }
585
640
  '''
586
641