avrotize 2.21.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +66 -0
- avrotize/__main__.py +6 -0
- avrotize/_version.py +34 -0
- avrotize/asn1toavro.py +160 -0
- avrotize/avrotize.py +152 -0
- avrotize/avrotocpp/CMakeLists.txt.jinja +77 -0
- avrotize/avrotocpp/build.bat.jinja +7 -0
- avrotize/avrotocpp/build.sh.jinja +7 -0
- avrotize/avrotocpp/dataclass_body.jinja +108 -0
- avrotize/avrotocpp/vcpkg.json.jinja +21 -0
- avrotize/avrotocpp.py +483 -0
- avrotize/avrotocsharp/README.md.jinja +166 -0
- avrotize/avrotocsharp/class_test.cs.jinja +266 -0
- avrotize/avrotocsharp/dataclass_core.jinja +293 -0
- avrotize/avrotocsharp/enum_test.cs.jinja +20 -0
- avrotize/avrotocsharp/project.csproj.jinja +30 -0
- avrotize/avrotocsharp/project.sln.jinja +34 -0
- avrotize/avrotocsharp/run_coverage.ps1.jinja +98 -0
- avrotize/avrotocsharp/run_coverage.sh.jinja +149 -0
- avrotize/avrotocsharp/testproject.csproj.jinja +19 -0
- avrotize/avrotocsharp.py +1180 -0
- avrotize/avrotocsv.py +121 -0
- avrotize/avrotodatapackage.py +173 -0
- avrotize/avrotodb.py +1383 -0
- avrotize/avrotogo/go_enum.jinja +12 -0
- avrotize/avrotogo/go_helpers.jinja +31 -0
- avrotize/avrotogo/go_struct.jinja +151 -0
- avrotize/avrotogo/go_test.jinja +47 -0
- avrotize/avrotogo/go_union.jinja +38 -0
- avrotize/avrotogo.py +476 -0
- avrotize/avrotographql.py +197 -0
- avrotize/avrotoiceberg.py +210 -0
- avrotize/avrotojava/class_test.java.jinja +212 -0
- avrotize/avrotojava/enum_test.java.jinja +21 -0
- avrotize/avrotojava/testproject.pom.jinja +54 -0
- avrotize/avrotojava.py +2156 -0
- avrotize/avrotojs.py +250 -0
- avrotize/avrotojsons.py +481 -0
- avrotize/avrotojstruct.py +345 -0
- avrotize/avrotokusto.py +364 -0
- avrotize/avrotomd/README.md.jinja +49 -0
- avrotize/avrotomd.py +137 -0
- avrotize/avrotools.py +168 -0
- avrotize/avrotoparquet.py +208 -0
- avrotize/avrotoproto.py +359 -0
- avrotize/avrotopython/dataclass_core.jinja +241 -0
- avrotize/avrotopython/enum_core.jinja +87 -0
- avrotize/avrotopython/pyproject_toml.jinja +18 -0
- avrotize/avrotopython/test_class.jinja +97 -0
- avrotize/avrotopython/test_enum.jinja +23 -0
- avrotize/avrotopython.py +626 -0
- avrotize/avrotorust/dataclass_enum.rs.jinja +74 -0
- avrotize/avrotorust/dataclass_struct.rs.jinja +204 -0
- avrotize/avrotorust/dataclass_union.rs.jinja +105 -0
- avrotize/avrotorust.py +435 -0
- avrotize/avrotots/class_core.ts.jinja +140 -0
- avrotize/avrotots/class_test.ts.jinja +77 -0
- avrotize/avrotots/enum_core.ts.jinja +46 -0
- avrotize/avrotots/gitignore.jinja +34 -0
- avrotize/avrotots/index.ts.jinja +0 -0
- avrotize/avrotots/package.json.jinja +23 -0
- avrotize/avrotots/tsconfig.json.jinja +21 -0
- avrotize/avrotots.py +687 -0
- avrotize/avrotoxsd.py +344 -0
- avrotize/cddltostructure.py +1841 -0
- avrotize/commands.json +3496 -0
- avrotize/common.py +834 -0
- avrotize/constants.py +87 -0
- avrotize/csvtoavro.py +132 -0
- avrotize/datapackagetoavro.py +76 -0
- avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
- avrotize/dependencies/cs/net90/dependencies.csproj +29 -0
- avrotize/dependencies/go/go121/go.mod +6 -0
- avrotize/dependencies/java/jdk21/pom.xml +91 -0
- avrotize/dependencies/python/py312/requirements.txt +13 -0
- avrotize/dependencies/rust/stable/Cargo.toml +17 -0
- avrotize/dependencies/typescript/node22/package.json +16 -0
- avrotize/dependency_resolver.py +348 -0
- avrotize/dependency_version.py +432 -0
- avrotize/generic/generic.avsc +57 -0
- avrotize/jsonstoavro.py +2167 -0
- avrotize/jsonstostructure.py +2864 -0
- avrotize/jstructtoavro.py +878 -0
- avrotize/kstructtoavro.py +93 -0
- avrotize/kustotoavro.py +455 -0
- avrotize/openapitostructure.py +717 -0
- avrotize/parquettoavro.py +157 -0
- avrotize/proto2parser.py +498 -0
- avrotize/proto3parser.py +403 -0
- avrotize/prototoavro.py +382 -0
- avrotize/prototypes/any.avsc +19 -0
- avrotize/prototypes/api.avsc +106 -0
- avrotize/prototypes/duration.avsc +20 -0
- avrotize/prototypes/field_mask.avsc +18 -0
- avrotize/prototypes/struct.avsc +60 -0
- avrotize/prototypes/timestamp.avsc +20 -0
- avrotize/prototypes/type.avsc +253 -0
- avrotize/prototypes/wrappers.avsc +117 -0
- avrotize/structuretocddl.py +597 -0
- avrotize/structuretocpp/CMakeLists.txt.jinja +76 -0
- avrotize/structuretocpp/build.bat.jinja +3 -0
- avrotize/structuretocpp/build.sh.jinja +3 -0
- avrotize/structuretocpp/dataclass_body.jinja +50 -0
- avrotize/structuretocpp/vcpkg.json.jinja +11 -0
- avrotize/structuretocpp.py +697 -0
- avrotize/structuretocsharp/class_test.cs.jinja +180 -0
- avrotize/structuretocsharp/dataclass_core.jinja +156 -0
- avrotize/structuretocsharp/enum_test.cs.jinja +36 -0
- avrotize/structuretocsharp/json_structure_converters.cs.jinja +399 -0
- avrotize/structuretocsharp/program.cs.jinja +49 -0
- avrotize/structuretocsharp/project.csproj.jinja +17 -0
- avrotize/structuretocsharp/project.sln.jinja +34 -0
- avrotize/structuretocsharp/testproject.csproj.jinja +18 -0
- avrotize/structuretocsharp/tuple_converter.cs.jinja +121 -0
- avrotize/structuretocsharp.py +2295 -0
- avrotize/structuretocsv.py +365 -0
- avrotize/structuretodatapackage.py +659 -0
- avrotize/structuretodb.py +1125 -0
- avrotize/structuretogo/go_enum.jinja +12 -0
- avrotize/structuretogo/go_helpers.jinja +26 -0
- avrotize/structuretogo/go_interface.jinja +18 -0
- avrotize/structuretogo/go_struct.jinja +187 -0
- avrotize/structuretogo/go_test.jinja +70 -0
- avrotize/structuretogo.py +729 -0
- avrotize/structuretographql.py +502 -0
- avrotize/structuretoiceberg.py +355 -0
- avrotize/structuretojava/choice_core.jinja +34 -0
- avrotize/structuretojava/class_core.jinja +23 -0
- avrotize/structuretojava/enum_core.jinja +18 -0
- avrotize/structuretojava/equals_hashcode.jinja +30 -0
- avrotize/structuretojava/pom.xml.jinja +26 -0
- avrotize/structuretojava/tuple_core.jinja +49 -0
- avrotize/structuretojava.py +938 -0
- avrotize/structuretojs/class_core.js.jinja +33 -0
- avrotize/structuretojs/enum_core.js.jinja +10 -0
- avrotize/structuretojs/package.json.jinja +12 -0
- avrotize/structuretojs/test_class.js.jinja +84 -0
- avrotize/structuretojs/test_enum.js.jinja +58 -0
- avrotize/structuretojs/test_runner.js.jinja +45 -0
- avrotize/structuretojs.py +657 -0
- avrotize/structuretojsons.py +498 -0
- avrotize/structuretokusto.py +639 -0
- avrotize/structuretomd/README.md.jinja +204 -0
- avrotize/structuretomd.py +322 -0
- avrotize/structuretoproto.py +764 -0
- avrotize/structuretopython/dataclass_core.jinja +363 -0
- avrotize/structuretopython/enum_core.jinja +45 -0
- avrotize/structuretopython/map_alias.jinja +21 -0
- avrotize/structuretopython/pyproject_toml.jinja +23 -0
- avrotize/structuretopython/test_class.jinja +103 -0
- avrotize/structuretopython/test_enum.jinja +34 -0
- avrotize/structuretopython.py +799 -0
- avrotize/structuretorust/dataclass_enum.rs.jinja +63 -0
- avrotize/structuretorust/dataclass_struct.rs.jinja +121 -0
- avrotize/structuretorust/dataclass_union.rs.jinja +81 -0
- avrotize/structuretorust.py +714 -0
- avrotize/structuretots/class_core.ts.jinja +78 -0
- avrotize/structuretots/enum_core.ts.jinja +6 -0
- avrotize/structuretots/gitignore.jinja +8 -0
- avrotize/structuretots/index.ts.jinja +1 -0
- avrotize/structuretots/package.json.jinja +39 -0
- avrotize/structuretots/test_class.ts.jinja +35 -0
- avrotize/structuretots/tsconfig.json.jinja +21 -0
- avrotize/structuretots.py +740 -0
- avrotize/structuretoxsd.py +679 -0
- avrotize/xsdtoavro.py +413 -0
- avrotize-2.21.1.dist-info/METADATA +1319 -0
- avrotize-2.21.1.dist-info/RECORD +171 -0
- avrotize-2.21.1.dist-info/WHEEL +4 -0
- avrotize-2.21.1.dist-info/entry_points.txt +3 -0
- avrotize-2.21.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
"""
|
|
3
|
+
Module to convert Parquet schema to Avro schema.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import pyarrow as pa
|
|
9
|
+
import pyarrow.parquet as pq
|
|
10
|
+
|
|
11
|
+
from avrotize.common import avro_name
|
|
12
|
+
|
|
13
|
+
class ParquetToAvroConverter:
|
|
14
|
+
"""
|
|
15
|
+
Class to convert Parquet schema to Avro schema.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, parquet_file_path, avro_schema_path, namespace=""):
|
|
19
|
+
"""
|
|
20
|
+
Initialize the converter with file paths and namespace.
|
|
21
|
+
|
|
22
|
+
:param parquet_file_path: Path to the Parquet file.
|
|
23
|
+
:param avro_schema_path: Path to save the Avro schema file.
|
|
24
|
+
:param namespace: Namespace for Avro records.
|
|
25
|
+
"""
|
|
26
|
+
self.parquet_file_path = parquet_file_path
|
|
27
|
+
self.avro_schema_path = avro_schema_path
|
|
28
|
+
self.namespace = namespace
|
|
29
|
+
|
|
30
|
+
def convert(self):
|
|
31
|
+
"""
|
|
32
|
+
Convert Parquet schema to Avro schema and save to file.
|
|
33
|
+
"""
|
|
34
|
+
parquet_table = pq.read_table(self.parquet_file_path)
|
|
35
|
+
schema = parquet_table.schema
|
|
36
|
+
|
|
37
|
+
# Infer the name of the schema from the parquet file name
|
|
38
|
+
schema_name = avro_name(os.path.basename(self.parquet_file_path).split(".")[0])
|
|
39
|
+
|
|
40
|
+
# Update the avro_schema dictionary
|
|
41
|
+
avro_schema = {
|
|
42
|
+
"type": "record",
|
|
43
|
+
"name": schema_name,
|
|
44
|
+
"namespace": self.namespace,
|
|
45
|
+
"fields": []
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
for field in schema:
|
|
49
|
+
avro_field = self.convert_parquet_field_to_avro_field(field)
|
|
50
|
+
avro_schema["fields"].append(avro_field)
|
|
51
|
+
|
|
52
|
+
with open(self.avro_schema_path, "w", encoding="utf-8") as file:
|
|
53
|
+
json.dump(avro_schema, file, indent=2)
|
|
54
|
+
|
|
55
|
+
def convert_parquet_field_to_avro_field(self, field):
|
|
56
|
+
"""
|
|
57
|
+
Convert a Parquet field to an Avro field.
|
|
58
|
+
|
|
59
|
+
:param field: Parquet field to convert.
|
|
60
|
+
:return: Avro field as a dictionary.
|
|
61
|
+
"""
|
|
62
|
+
avro_type = self.convert_parquet_type_to_avro_type(field.type, field.name)
|
|
63
|
+
avro_field = {
|
|
64
|
+
"name": field.name,
|
|
65
|
+
"type": avro_type
|
|
66
|
+
}
|
|
67
|
+
if field.metadata and b'description' in field.metadata:
|
|
68
|
+
avro_field["doc"] = field.metadata[b'description'].decode("utf-8")
|
|
69
|
+
return avro_field
|
|
70
|
+
|
|
71
|
+
def convert_parquet_type_to_avro_type(self, parquet_type, field_name):
|
|
72
|
+
"""
|
|
73
|
+
Convert a Parquet type to an Avro type.
|
|
74
|
+
|
|
75
|
+
:param parquet_type: Parquet type to convert.
|
|
76
|
+
:param field_name: Name of the field being converted.
|
|
77
|
+
:return: Avro type as a string or dictionary.
|
|
78
|
+
"""
|
|
79
|
+
if pa.types.is_int8(parquet_type):
|
|
80
|
+
return "int"
|
|
81
|
+
if pa.types.is_int16(parquet_type):
|
|
82
|
+
return "int"
|
|
83
|
+
if pa.types.is_int32(parquet_type):
|
|
84
|
+
return "int"
|
|
85
|
+
if pa.types.is_int64(parquet_type):
|
|
86
|
+
return "long"
|
|
87
|
+
if pa.types.is_uint8(parquet_type):
|
|
88
|
+
return "int"
|
|
89
|
+
if pa.types.is_uint16(parquet_type):
|
|
90
|
+
return "int"
|
|
91
|
+
if pa.types.is_uint32(parquet_type):
|
|
92
|
+
return "long"
|
|
93
|
+
if pa.types.is_uint64(parquet_type):
|
|
94
|
+
return "long"
|
|
95
|
+
if pa.types.is_float32(parquet_type):
|
|
96
|
+
return "float"
|
|
97
|
+
if pa.types.is_float64(parquet_type):
|
|
98
|
+
return "double"
|
|
99
|
+
if pa.types.is_boolean(parquet_type):
|
|
100
|
+
return "boolean"
|
|
101
|
+
if pa.types.is_binary(parquet_type):
|
|
102
|
+
return "bytes"
|
|
103
|
+
if pa.types.is_string(parquet_type):
|
|
104
|
+
return "string"
|
|
105
|
+
if pa.types.is_timestamp(parquet_type):
|
|
106
|
+
return {"type": "long", "logicalType": "timestamp-millis"}
|
|
107
|
+
if pa.types.is_date32(parquet_type):
|
|
108
|
+
return {"type": "int", "logicalType": "date"}
|
|
109
|
+
if pa.types.is_date64(parquet_type):
|
|
110
|
+
return {"type": "long", "logicalType": "timestamp-millis"}
|
|
111
|
+
if pa.types.is_list(parquet_type):
|
|
112
|
+
return {
|
|
113
|
+
"type": "array",
|
|
114
|
+
"items": self.convert_parquet_type_to_avro_type(parquet_type.value_type, field_name)
|
|
115
|
+
}
|
|
116
|
+
if pa.types.is_map(parquet_type):
|
|
117
|
+
return {
|
|
118
|
+
"type": "map",
|
|
119
|
+
"values": self.convert_parquet_type_to_avro_type(parquet_type.item_type, field_name)
|
|
120
|
+
}
|
|
121
|
+
if pa.types.is_struct(parquet_type):
|
|
122
|
+
fields = [
|
|
123
|
+
{
|
|
124
|
+
"name": nested_field.name,
|
|
125
|
+
"type": self.convert_parquet_type_to_avro_type(nested_field.type, nested_field.name)
|
|
126
|
+
} for nested_field in parquet_type
|
|
127
|
+
]
|
|
128
|
+
return {
|
|
129
|
+
"type": "record",
|
|
130
|
+
"name": f"{field_name}Type",
|
|
131
|
+
"namespace": self.namespace,
|
|
132
|
+
"fields": fields
|
|
133
|
+
}
|
|
134
|
+
if pa.types.is_decimal(parquet_type):
|
|
135
|
+
return {
|
|
136
|
+
"type": "bytes",
|
|
137
|
+
"logicalType": "decimal",
|
|
138
|
+
"precision": parquet_type.precision,
|
|
139
|
+
"scale": parquet_type.scale
|
|
140
|
+
}
|
|
141
|
+
return "string"
|
|
142
|
+
|
|
143
|
+
def convert_parquet_to_avro(parquet_file_path, avro_file_path, namespace=""):
|
|
144
|
+
"""
|
|
145
|
+
Convert a Parquet file to an Avro schema file.
|
|
146
|
+
|
|
147
|
+
:param parquet_file_path: Path to the Parquet file.
|
|
148
|
+
:param avro_file_path: Path to save the Avro schema file.
|
|
149
|
+
:param namespace: Namespace for Avro records.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
if not os.path.exists(parquet_file_path):
|
|
153
|
+
raise FileNotFoundError(f"Parquet file not found: {parquet_file_path}")
|
|
154
|
+
|
|
155
|
+
converter = ParquetToAvroConverter(parquet_file_path, avro_file_path, namespace)
|
|
156
|
+
converter.convert()
|
|
157
|
+
|
avrotize/proto2parser.py
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
|
13
|
+
# software distributed under the License is distributed on an
|
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
# KIND, either express or implied. See the License for the
|
|
16
|
+
# specific language governing permissions and limitations
|
|
17
|
+
# under the License.
|
|
18
|
+
# adapted from: https://github.com/xophiix/proto2parser/
|
|
19
|
+
|
|
20
|
+
from lark import Lark, Transformer, Tree, Token
|
|
21
|
+
from collections import namedtuple
|
|
22
|
+
import typing
|
|
23
|
+
import json
|
|
24
|
+
|
|
25
|
+
BNF = r'''
|
|
26
|
+
OCTALDIGIT: "0..7"
|
|
27
|
+
IDENT: ( "_" )* LETTER ( LETTER | DECIMALDIGIT | "_" )*
|
|
28
|
+
FULLIDENT: IDENT ( "." IDENT )*
|
|
29
|
+
MESSAGENAME: IDENT
|
|
30
|
+
ENUMNAME: IDENT
|
|
31
|
+
FIELDNAME: IDENT
|
|
32
|
+
ONEOFNAME: IDENT
|
|
33
|
+
MAPNAME: IDENT
|
|
34
|
+
SERVICENAME: IDENT
|
|
35
|
+
TAGNAME: IDENT
|
|
36
|
+
TAGVALUE: IDENT
|
|
37
|
+
RPCNAME: IDENT
|
|
38
|
+
MESSAGETYPE: [ "." ] ( IDENT "." )* MESSAGENAME
|
|
39
|
+
ENUMTYPE: [ "." ] ( IDENT "." )* ENUMNAME
|
|
40
|
+
CAPITALLETTER: "A..Z"
|
|
41
|
+
GROUPNAME: CAPITALLETTER ( LETTER | DECIMALDIGIT | "_" )*
|
|
42
|
+
|
|
43
|
+
INTLIT : DECIMALLIT | OCTALLIT | HEXLIT
|
|
44
|
+
DECIMALLIT: ( "1".."9" ) ( DECIMALDIGIT )*
|
|
45
|
+
OCTALLIT : "0" ( OCTALDIGIT )*
|
|
46
|
+
HEXLIT : "0" ( "x" | "X" ) HEXDIGIT ( HEXDIGIT )*
|
|
47
|
+
|
|
48
|
+
FLOATLIT: ( DECIMALS "." [ DECIMALS ] [ EXPONENT ] | DECIMALS EXPONENT | "."DECIMALS [ EXPONENT ] ) | "inf" | "nan"
|
|
49
|
+
DECIMALS : DECIMALDIGIT ( DECIMALDIGIT )*
|
|
50
|
+
EXPONENT : ( "e" | "E" ) [ "+" | "-" ] DECIMALS
|
|
51
|
+
|
|
52
|
+
BOOLLIT: "true" | "false"
|
|
53
|
+
|
|
54
|
+
STRLIT: ( "'" ( CHARVALUE )* "'" ) | ( "\"" ( CHARVALUE )* "\"" )
|
|
55
|
+
CHARVALUE: HEXESCAPE | OCTESCAPE | CHARESCAPE | /[^\0\n\\]/
|
|
56
|
+
HEXESCAPE: "\\" ( "x" | "X" ) HEXDIGIT HEXDIGIT
|
|
57
|
+
OCTESCAPE: "\\" OCTALDIGIT OCTALDIGIT OCTALDIGIT
|
|
58
|
+
CHARESCAPE: "\\" ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" | "'" | "\"" )
|
|
59
|
+
QUOTE: "'" | "\""
|
|
60
|
+
|
|
61
|
+
EMPTYSTATEMENT: ";"
|
|
62
|
+
|
|
63
|
+
CONSTANT: FULLIDENT | ( [ "-" | "+" ] INTLIT ) | ( [ "-" | "+" ] FLOATLIT ) | STRLIT | BOOLLIT
|
|
64
|
+
|
|
65
|
+
syntax: "syntax" "=" QUOTE "proto2" QUOTE tail
|
|
66
|
+
|
|
67
|
+
import: "import" [ "weak" | "public" ] STRLIT tail
|
|
68
|
+
|
|
69
|
+
package: "package" FULLIDENT tail
|
|
70
|
+
|
|
71
|
+
option: [ comments ] "option" OPTIONNAME "=" CONSTANT tail
|
|
72
|
+
OPTIONNAME: ( IDENT | "(" FULLIDENT ")" ) ( "." IDENT )*
|
|
73
|
+
|
|
74
|
+
LABEL: "required" | "optional" | "repeated"
|
|
75
|
+
TYPE: "double" | "float" | "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" | "bytes" | MESSAGETYPE | ENUMTYPE
|
|
76
|
+
FIELDNUMBER: INTLIT
|
|
77
|
+
|
|
78
|
+
field: [ comments ] LABEL TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
|
|
79
|
+
fieldoptions: fieldoption ( "," fieldoption )*
|
|
80
|
+
fieldoption: OPTIONNAME "=" CONSTANT
|
|
81
|
+
|
|
82
|
+
oneof: [ comments ] "oneof" ONEOFNAME "{" ( oneoffield | EMPTYSTATEMENT )* "}"
|
|
83
|
+
oneoffield: [ comments ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
|
|
84
|
+
group: [ comments ] LABEL "group" GROUPNAME "=" FIELDNUMBER messagebody
|
|
85
|
+
|
|
86
|
+
mapfield: [ comments ] "map" "<" KEYTYPE "," TYPE ">" MAPNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
|
|
87
|
+
KEYTYPE: "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
|
|
88
|
+
|
|
89
|
+
extensions: [ comments ] "extensions" ranges tail
|
|
90
|
+
|
|
91
|
+
reserved: [ comments ] "reserved" ( ranges | fieldnames ) tail
|
|
92
|
+
ranges: range ( "," range )*
|
|
93
|
+
range: INTLIT [ "to" ( INTLIT | "max" ) ]
|
|
94
|
+
fieldnames: FIELDNAME ( "," FIELDNAME )*
|
|
95
|
+
|
|
96
|
+
enum: [ comments ] "enum" ENUMNAME enumbody
|
|
97
|
+
enumbody: "{" ( option | enumfield | reserved | EMPTYSTATEMENT )* "}"
|
|
98
|
+
enumfield: [ comments ] IDENT "=" [ "-" ] INTLIT [ "[" enumvalueoption ( "," enumvalueoption )* "]" ] tail
|
|
99
|
+
enumvalueoption: OPTIONNAME "=" CONSTANT
|
|
100
|
+
|
|
101
|
+
message: [ comments ] "message" MESSAGENAME messagebody
|
|
102
|
+
messagebody: "{" ( field | enum | message | extend | extensions | option | oneof | mapfield | reserved | group | EMPTYSTATEMENT )* "}"
|
|
103
|
+
extend: [ comments ] "extend" MESSAGETYPE "{" (field | group)* "}"
|
|
104
|
+
|
|
105
|
+
service: [ comments ] "service" SERVICENAME "{" ( option | rpc | EMPTYSTATEMENT )* "}"
|
|
106
|
+
rpc: [ comments ] "rpc" RPCNAME "(" [ "stream" ] MESSAGETYPE ")" "returns" "(" [ "stream" ] MESSAGETYPE ")" ( ( "{" ( option | EMPTYSTATEMENT )* "}" ) | ";" )
|
|
107
|
+
|
|
108
|
+
proto:[ comments ] [ syntax ] ( import | package | option | topleveldef | EMPTYSTATEMENT )*
|
|
109
|
+
topleveldef: message | enum | extend | service
|
|
110
|
+
|
|
111
|
+
tail: ";" /[\s|\t]/* [ trail_comment ] NEWLINE
|
|
112
|
+
trail_comment: COMMENT
|
|
113
|
+
COMMENT: "//" /[^\n]/*
|
|
114
|
+
BLOCKCOMMENT: "/*" /./* "*/"
|
|
115
|
+
comments: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
|
|
116
|
+
|
|
117
|
+
%import common.HEXDIGIT
|
|
118
|
+
%import common.DIGIT -> DECIMALDIGIT
|
|
119
|
+
%import common.LETTER
|
|
120
|
+
%import common.WS
|
|
121
|
+
%import common.NEWLINE
|
|
122
|
+
%ignore WS
|
|
123
|
+
'''
|
|
124
|
+
|
|
125
|
+
Tail = typing.NamedTuple('Tail', [('comment', 'Comment')])
|
|
126
|
+
Comment = typing.NamedTuple('Comment', [('content', str), ('tags', typing.Dict[str, typing.Any]), ('ue_specifiers', str)])
|
|
127
|
+
Oneof = typing.NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field'])])
|
|
128
|
+
FieldOption = typing.NamedTuple('FieldOption', [('name', str), ('content', str)])
|
|
129
|
+
Field = typing.NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int), ('options', typing.Dict[str, 'FieldOption']), ('user_data', typing.Dict[str, typing.Any])])
|
|
130
|
+
Enum = typing.NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', typing.Dict[str, 'Field']), ('user_data', typing.Dict[str, typing.Any])])
|
|
131
|
+
Option = typing.NamedTuple('Option', [('comment', 'Comment'), ('name', str), ('content', str)])
|
|
132
|
+
Message = typing.NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field']), ('oneofs', typing.List['Oneof']),
|
|
133
|
+
('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']), ('options', typing.Dict[str, 'Option']), ('user_data', typing.Dict[str, typing.Any])])
|
|
134
|
+
Service = typing.NamedTuple('Service', [('name', str), ('functions', typing.Dict[str, 'RpcFunc'])])
|
|
135
|
+
RpcFunc = typing.NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
|
|
136
|
+
ProtoFile = typing.NamedTuple('ProtoFile',
|
|
137
|
+
[('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']),
|
|
138
|
+
('services', typing.Dict[str, 'Service']), ('imports', typing.List[str]),
|
|
139
|
+
('options', typing.Dict[str, str]), ('package', str), ('user_data', typing.Dict[str, typing.Any])])
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def merge_comments(comments):
|
|
143
|
+
content = ""
|
|
144
|
+
tags = {}
|
|
145
|
+
ue_specifiers = None
|
|
146
|
+
|
|
147
|
+
for comment in comments:
|
|
148
|
+
content += comment.content
|
|
149
|
+
for tag, value in comment.tags.items():
|
|
150
|
+
tags[tag] = value
|
|
151
|
+
if not ue_specifiers and comment.ue_specifiers:
|
|
152
|
+
ue_specifiers = comment.ue_specifiers
|
|
153
|
+
|
|
154
|
+
return Comment(content, tags, ue_specifiers)
|
|
155
|
+
|
|
156
|
+
def extrat_comments(tokens):
|
|
157
|
+
comments = []
|
|
158
|
+
for token in tokens:
|
|
159
|
+
if isinstance(token, Comment):
|
|
160
|
+
comments.append(token)
|
|
161
|
+
elif isinstance(token, Tail):
|
|
162
|
+
if token.comment:
|
|
163
|
+
comments.append(token.comment)
|
|
164
|
+
elif isinstance(token, Token):
|
|
165
|
+
if token.type == "COMMENT":
|
|
166
|
+
comments.append(Comment(token.value, {}, None))
|
|
167
|
+
|
|
168
|
+
return merge_comments(comments)
|
|
169
|
+
|
|
170
|
+
class ProtoTransformer(Transformer):
|
|
171
|
+
'''Converts syntax tree token into more easily usable namedtuple objects'''
|
|
172
|
+
def message(self, tokens):
|
|
173
|
+
'''Returns a Message namedtuple'''
|
|
174
|
+
comment = Comment("", {}, None)
|
|
175
|
+
if len(tokens) < 3:
|
|
176
|
+
name_token, body = tokens
|
|
177
|
+
else:
|
|
178
|
+
comment, name_token, body = tokens
|
|
179
|
+
return Message(comment, name_token.value, *body, {})
|
|
180
|
+
|
|
181
|
+
def oneof(self, tokens):
|
|
182
|
+
'''Returns a Oneof namedtuple'''
|
|
183
|
+
comment = Comment("", {})
|
|
184
|
+
fields = []
|
|
185
|
+
name = None
|
|
186
|
+
for token in tokens:
|
|
187
|
+
if isinstance(token, Comment):
|
|
188
|
+
comment = token
|
|
189
|
+
elif isinstance(token, Field):
|
|
190
|
+
fields.append(token)
|
|
191
|
+
elif isinstance(token, Token) and token.type == 'ONEOFNAME':
|
|
192
|
+
name = token.value
|
|
193
|
+
return Oneof(comment, name, fields)
|
|
194
|
+
|
|
195
|
+
def oneoffield(self, tokens):
|
|
196
|
+
'''Returns a Field namedtuple'''
|
|
197
|
+
comment = Comment("", {})
|
|
198
|
+
type = Token("TYPE", "")
|
|
199
|
+
fieldname = Token("FIELDNAME", "")
|
|
200
|
+
fieldnumber = Token("FIELDNUMBER", "")
|
|
201
|
+
for token in tokens:
|
|
202
|
+
if isinstance(token, Comment):
|
|
203
|
+
comment = token
|
|
204
|
+
elif isinstance(token, Token):
|
|
205
|
+
if token.type == "TYPE":
|
|
206
|
+
type = token
|
|
207
|
+
elif token.type == "FIELDNAME":
|
|
208
|
+
fieldname = token
|
|
209
|
+
elif token.type == "FIELDNUMBER":
|
|
210
|
+
fieldnumber = token
|
|
211
|
+
elif token.type == "COMMENT":
|
|
212
|
+
comment = Comment(token.value, {})
|
|
213
|
+
return Field(comment, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
|
|
214
|
+
|
|
215
|
+
def fieldoption(self, tokens):
|
|
216
|
+
name = Token("TYPE", "")
|
|
217
|
+
content = Token("", "")
|
|
218
|
+
for token in tokens:
|
|
219
|
+
if isinstance(token, Token):
|
|
220
|
+
if token.type == "OPTIONNAME":
|
|
221
|
+
name.value = token.value.strip("()")
|
|
222
|
+
if token.type == "CONSTANT":
|
|
223
|
+
content = token
|
|
224
|
+
|
|
225
|
+
return FieldOption(name, content)
|
|
226
|
+
|
|
227
|
+
def enumvalueoption(self, tokens):
|
|
228
|
+
return self.fieldoption(tokens)
|
|
229
|
+
|
|
230
|
+
def option(self, tokens):
|
|
231
|
+
name = Token("TYPE", "")
|
|
232
|
+
content = Token("", "")
|
|
233
|
+
comment = extrat_comments(tokens)
|
|
234
|
+
for token in tokens:
|
|
235
|
+
if isinstance(token, Comment):
|
|
236
|
+
comment = token
|
|
237
|
+
elif isinstance(token, Token):
|
|
238
|
+
if token.type == "OPTIONNAME":
|
|
239
|
+
name.value = token.value.strip("()")
|
|
240
|
+
if token.type == "CONSTANT":
|
|
241
|
+
content = token
|
|
242
|
+
|
|
243
|
+
return Option(comment, name, content)
|
|
244
|
+
|
|
245
|
+
def messagebody(self, items):
|
|
246
|
+
'''Returns a tuple of message body namedtuples'''
|
|
247
|
+
messages = {}
|
|
248
|
+
enums = {}
|
|
249
|
+
fields = []
|
|
250
|
+
options = {}
|
|
251
|
+
oneofs = []
|
|
252
|
+
for item in items:
|
|
253
|
+
if isinstance(item, Message):
|
|
254
|
+
messages[item.name] = item
|
|
255
|
+
elif isinstance(item, Enum):
|
|
256
|
+
enums[item.name] = item
|
|
257
|
+
elif isinstance(item, Field):
|
|
258
|
+
fields.append(item)
|
|
259
|
+
elif isinstance(item, Option):
|
|
260
|
+
options[item.name] = item
|
|
261
|
+
elif isinstance(item, Oneof):
|
|
262
|
+
oneofs.append(item)
|
|
263
|
+
|
|
264
|
+
return fields, oneofs, messages, enums, options
|
|
265
|
+
|
|
266
|
+
def tail(self, tokens):
|
|
267
|
+
comment = None
|
|
268
|
+
for token in tokens:
|
|
269
|
+
if isinstance(token, Comment):
|
|
270
|
+
comment = token
|
|
271
|
+
|
|
272
|
+
return Tail(comment)
|
|
273
|
+
|
|
274
|
+
def field(self, tokens):
|
|
275
|
+
'''Returns a Field namedtuple'''
|
|
276
|
+
type = Token("TYPE", "")
|
|
277
|
+
label = Token("LABEL", "")
|
|
278
|
+
fieldname = Token("FIELDNAME", "")
|
|
279
|
+
fieldnumber = Token("FIELDNUMBER", "")
|
|
280
|
+
options = {}
|
|
281
|
+
for token in tokens:
|
|
282
|
+
if isinstance(token, Tree) and token.data == 'fieldoptions':
|
|
283
|
+
for fieldoption in token.children:
|
|
284
|
+
if isinstance(fieldoption, FieldOption):
|
|
285
|
+
options[fieldoption.name.value] = fieldoption
|
|
286
|
+
elif isinstance(token, Token):
|
|
287
|
+
if token.type == "TYPE":
|
|
288
|
+
type = token
|
|
289
|
+
elif token.type == "LABEL":
|
|
290
|
+
label = token
|
|
291
|
+
elif token.type == "FIELDNAME":
|
|
292
|
+
fieldname = token
|
|
293
|
+
elif token.type == "FIELDNUMBER":
|
|
294
|
+
fieldnumber = token
|
|
295
|
+
|
|
296
|
+
return Field(extrat_comments(tokens), label.value, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value), options, {})
|
|
297
|
+
|
|
298
|
+
def mapfield(self, tokens):
|
|
299
|
+
'''Returns a Field namedtuple'''
|
|
300
|
+
val_type = Token("TYPE", "")
|
|
301
|
+
key_type = Token("KEYTYPE", "")
|
|
302
|
+
fieldname = Token("MAPNAME", "")
|
|
303
|
+
fieldnumber = Token("FIELDNUMBER", "")
|
|
304
|
+
options = {}
|
|
305
|
+
for token in tokens:
|
|
306
|
+
if isinstance(token, Tree) and token.data == 'fieldoptions':
|
|
307
|
+
for fieldoption in token.children:
|
|
308
|
+
if isinstance(fieldoption, FieldOption):
|
|
309
|
+
options[token.name] = token
|
|
310
|
+
elif isinstance(token, Token):
|
|
311
|
+
if token.type == "TYPE":
|
|
312
|
+
val_type = token
|
|
313
|
+
elif token.type == "KEYTYPE":
|
|
314
|
+
key_type = token
|
|
315
|
+
elif token.type == "MAPNAME":
|
|
316
|
+
fieldname = token
|
|
317
|
+
elif token.type == "FIELDNUMBER":
|
|
318
|
+
fieldnumber = token
|
|
319
|
+
return Field(extrat_comments(tokens), '', 'map', key_type.value, val_type.value, fieldname.value, int(fieldnumber.value), options, {})
|
|
320
|
+
|
|
321
|
+
def comments(self, tokens):
|
|
322
|
+
'''Returns a Tag namedtuple'''
|
|
323
|
+
comment = ''
|
|
324
|
+
tags = {}
|
|
325
|
+
ue_specifier = None
|
|
326
|
+
for token in tokens:
|
|
327
|
+
if token is None:
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
token_str = ""
|
|
331
|
+
if isinstance(token, Token):
|
|
332
|
+
token_str = token.value
|
|
333
|
+
else:
|
|
334
|
+
token_str = token
|
|
335
|
+
|
|
336
|
+
if token_str.find("//") >= 0:
|
|
337
|
+
comment_content = token_str.replace("//", "").strip(" /\n")
|
|
338
|
+
if comment_content.startswith("UPROPERTY") or comment_content.startswith("UCLASS") or comment_content.startswith("UENUM"):
|
|
339
|
+
ue_specifier = comment_content
|
|
340
|
+
continue
|
|
341
|
+
|
|
342
|
+
comment += token_str + "\n"
|
|
343
|
+
if token_str.find('@') < 0:
|
|
344
|
+
continue
|
|
345
|
+
kvs = token_str.strip(" /\n").split('@')
|
|
346
|
+
for kv in kvs:
|
|
347
|
+
kv = kv.strip(" /\n")
|
|
348
|
+
if not kv:
|
|
349
|
+
continue
|
|
350
|
+
tmp = kv.split('=')
|
|
351
|
+
key = tmp[0].strip(" /\n").lower()
|
|
352
|
+
if key.find(" ") >= 0:
|
|
353
|
+
continue
|
|
354
|
+
if len(tmp) > 1:
|
|
355
|
+
tags[key] = tmp[1].lower()
|
|
356
|
+
else:
|
|
357
|
+
tags[key] = True
|
|
358
|
+
return Comment(comment, tags, ue_specifier)
|
|
359
|
+
|
|
360
|
+
def trail_comment(self, tokens):
|
|
361
|
+
if len(tokens) > 0:
|
|
362
|
+
return Comment(tokens[0].value, {}, None)
|
|
363
|
+
else:
|
|
364
|
+
return Comment("", {}, None)
|
|
365
|
+
|
|
366
|
+
def enum(self, tokens):
|
|
367
|
+
'''Returns an Enum namedtuple'''
|
|
368
|
+
comment = Comment("", {}, None)
|
|
369
|
+
if len(tokens) < 3:
|
|
370
|
+
name, fields = tokens
|
|
371
|
+
else:
|
|
372
|
+
comment, name, fields = tokens
|
|
373
|
+
return Enum(comment, name.value, fields, {})
|
|
374
|
+
|
|
375
|
+
def enumbody(self, tokens):
|
|
376
|
+
'''Returns a sequence of enum identifiers'''
|
|
377
|
+
enumitems = []
|
|
378
|
+
for tree in tokens:
|
|
379
|
+
if tree.data != 'enumfield':
|
|
380
|
+
continue
|
|
381
|
+
name = Token("IDENT", "")
|
|
382
|
+
value = Token("INTLIT", "")
|
|
383
|
+
options = {}
|
|
384
|
+
for token in tree.children:
|
|
385
|
+
if isinstance(token, Tree) and token.data == 'enumvalueoption':
|
|
386
|
+
for enumvalueoption in token.children:
|
|
387
|
+
if isinstance(enumvalueoption, FieldOption):
|
|
388
|
+
options[token.name] = token
|
|
389
|
+
elif isinstance(token, Token):
|
|
390
|
+
if token.type == "IDENT":
|
|
391
|
+
name = token
|
|
392
|
+
elif token.type == "INTLIT":
|
|
393
|
+
value = token
|
|
394
|
+
enumitems.append(Field(extrat_comments(tree.children), '', 'enum', 'enum', 'enum', name.value, value.value, options, {}))
|
|
395
|
+
return enumitems
|
|
396
|
+
|
|
397
|
+
def service(self, tokens):
|
|
398
|
+
'''Returns a Service namedtuple'''
|
|
399
|
+
functions = []
|
|
400
|
+
name = ''
|
|
401
|
+
for i in range(0, len(tokens)):
|
|
402
|
+
if not isinstance(tokens[i], Comment):
|
|
403
|
+
if isinstance(tokens[i], RpcFunc):
|
|
404
|
+
functions.append(tokens[i])
|
|
405
|
+
else:
|
|
406
|
+
name = tokens[i].value
|
|
407
|
+
return Service(name, functions)
|
|
408
|
+
|
|
409
|
+
def rpc(self, tokens):
|
|
410
|
+
'''Returns a RpcFunc namedtuple'''
|
|
411
|
+
uri = ''
|
|
412
|
+
in_type = ''
|
|
413
|
+
for token in tokens:
|
|
414
|
+
if isinstance(token, Token):
|
|
415
|
+
if token.type == "RPCNAME":
|
|
416
|
+
name = token
|
|
417
|
+
elif token.type == "MESSAGETYPE":
|
|
418
|
+
if in_type:
|
|
419
|
+
out_type = token
|
|
420
|
+
else:
|
|
421
|
+
in_type = token
|
|
422
|
+
elif not isinstance(token, Comment):
|
|
423
|
+
option_token = token
|
|
424
|
+
uri = option_token.children[0].value
|
|
425
|
+
return RpcFunc(name.value, in_type.value, out_type.value, uri.strip('"'))
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def _recursive_to_dict(obj):
|
|
429
|
+
_dict = {}
|
|
430
|
+
|
|
431
|
+
if isinstance(obj, tuple):
|
|
432
|
+
node = obj._asdict()
|
|
433
|
+
for item in node:
|
|
434
|
+
if isinstance(node[item], list): # Process as a list
|
|
435
|
+
_dict[item] = [_recursive_to_dict(x) for x in (node[item])]
|
|
436
|
+
elif isinstance(node[item], tuple): # Process as a NamedTuple
|
|
437
|
+
_dict[item] = _recursive_to_dict(node[item])
|
|
438
|
+
elif isinstance(node[item], dict):
|
|
439
|
+
for k in node[item]:
|
|
440
|
+
if isinstance(node[item][k], tuple):
|
|
441
|
+
node[item][k] = _recursive_to_dict(node[item][k])
|
|
442
|
+
_dict[item] = node[item]
|
|
443
|
+
else: # Process as a regular element
|
|
444
|
+
_dict[item] = (node[item])
|
|
445
|
+
return _dict
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def parse_from_file(file: str, encoding: str="utf-8"):
|
|
449
|
+
with open(file, 'r', encoding=encoding) as f:
|
|
450
|
+
data = f.read()
|
|
451
|
+
if data:
|
|
452
|
+
return parse(data)
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def parse(data: str):
|
|
456
|
+
parser = Lark(BNF, start='proto', parser='earley', debug=True)
|
|
457
|
+
tree = parser.parse(data)
|
|
458
|
+
trans_tree = ProtoTransformer().transform(tree)
|
|
459
|
+
enums = {}
|
|
460
|
+
messages = {}
|
|
461
|
+
services = {}
|
|
462
|
+
imports = []
|
|
463
|
+
import_tree = trans_tree.find_data('import')
|
|
464
|
+
for tree in import_tree:
|
|
465
|
+
for child in tree.children:
|
|
466
|
+
if isinstance(child, Token):
|
|
467
|
+
imports.append(child.value.strip('"'))
|
|
468
|
+
options = {}
|
|
469
|
+
option_tree = trans_tree.find_data('option')
|
|
470
|
+
for tree in option_tree:
|
|
471
|
+
options[tree.children[0]] = tree.children[1].strip('"')
|
|
472
|
+
|
|
473
|
+
package = ''
|
|
474
|
+
package_tree = trans_tree.find_data('package')
|
|
475
|
+
for tree in package_tree:
|
|
476
|
+
package = tree.children[0]
|
|
477
|
+
|
|
478
|
+
top_data = trans_tree.find_data('topleveldef')
|
|
479
|
+
for top_level in top_data:
|
|
480
|
+
for child in top_level.children:
|
|
481
|
+
if isinstance(child, Message):
|
|
482
|
+
messages[child.name] = child
|
|
483
|
+
if isinstance(child, Enum):
|
|
484
|
+
enums[child.name] = child
|
|
485
|
+
if isinstance(child, Service):
|
|
486
|
+
services[child.name] = child
|
|
487
|
+
return ProtoFile(messages, enums, services, imports, options, package, {})
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def serialize2json(data):
|
|
491
|
+
return json.dumps(_recursive_to_dict(parse(data)))
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def serialize2json_from_file(file: str, encoding: str="utf-8"):
|
|
495
|
+
with open(file, 'r', encoding=encoding) as f:
|
|
496
|
+
data = f.read()
|
|
497
|
+
if data:
|
|
498
|
+
return json.dumps(_recursive_to_dict(parse(data)), indent=4)
|