python-datamodel 0.10.1__cp313-cp313-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamodel/__init__.py +13 -0
- datamodel/abstract.py +383 -0
- datamodel/adaptive/__init__.py +0 -0
- datamodel/adaptive/models.py +598 -0
- datamodel/aliases/__init__.py +26 -0
- datamodel/base.py +180 -0
- datamodel/converters.c +43471 -0
- datamodel/converters.cp313-win32.pyd +0 -0
- datamodel/converters.html +17387 -0
- datamodel/converters.pyx +1489 -0
- datamodel/exceptions.c +13455 -0
- datamodel/exceptions.cp313-win32.pyd +0 -0
- datamodel/exceptions.html +1261 -0
- datamodel/exceptions.pxd +13 -0
- datamodel/exceptions.pyx +50 -0
- datamodel/fields.cp313-win32.pyd +0 -0
- datamodel/fields.cpp +17401 -0
- datamodel/fields.html +3912 -0
- datamodel/fields.pyx +309 -0
- datamodel/functions.cp313-win32.pyd +0 -0
- datamodel/functions.cpp +9068 -0
- datamodel/functions.html +1766 -0
- datamodel/functions.pxd +9 -0
- datamodel/functions.pyx +82 -0
- datamodel/jsonld/__init__.py +45 -0
- datamodel/jsonld/models.py +500 -0
- datamodel/libs/__init__.py +1 -0
- datamodel/libs/mapping.c +15067 -0
- datamodel/libs/mapping.cp313-win32.pyd +0 -0
- datamodel/libs/mapping.html +2618 -0
- datamodel/libs/mapping.pxd +11 -0
- datamodel/libs/mapping.pyx +135 -0
- datamodel/libs/mutables.py +127 -0
- datamodel/models.py +814 -0
- datamodel/parsers/__init__.py +0 -0
- datamodel/parsers/encoders.py +15 -0
- datamodel/parsers/json.cp313-win32.pyd +0 -0
- datamodel/parsers/json.cpp +17004 -0
- datamodel/parsers/json.html +3365 -0
- datamodel/parsers/json.pyx +250 -0
- datamodel/profiler.py +21 -0
- datamodel/py.typed +0 -0
- datamodel/rs_core/Cargo.toml +17 -0
- datamodel/rs_core/src/lib.rs +294 -0
- datamodel/rs_parsers/Cargo.toml +22 -0
- datamodel/rs_parsers/src/lib.rs +571 -0
- datamodel/rs_parsers.cp313-win32.pyd +0 -0
- datamodel/rs_validators/Cargo.toml +17 -0
- datamodel/rs_validators/src/lib.rs +0 -0
- datamodel/typedefs/__init__.py +9 -0
- datamodel/typedefs/singleton.c +9169 -0
- datamodel/typedefs/singleton.cp313-win32.pyd +0 -0
- datamodel/typedefs/singleton.html +629 -0
- datamodel/typedefs/singleton.pxd +9 -0
- datamodel/typedefs/singleton.pyx +24 -0
- datamodel/typedefs/types.c +11716 -0
- datamodel/typedefs/types.cp313-win32.pyd +0 -0
- datamodel/typedefs/types.html +732 -0
- datamodel/typedefs/types.pxd +11 -0
- datamodel/typedefs/types.pyx +39 -0
- datamodel/types.c +7165 -0
- datamodel/types.cp313-win32.pyd +0 -0
- datamodel/types.html +716 -0
- datamodel/types.pyx +100 -0
- datamodel/validation.cp313-win32.pyd +0 -0
- datamodel/validation.cpp +17085 -0
- datamodel/validation.html +4769 -0
- datamodel/validation.pyx +315 -0
- datamodel/version.py +13 -0
- examples/nn/examples.py +311 -0
- examples/nn/stores.py +151 -0
- examples/tests/sp_types.py +294 -0
- examples/tests/speed_dates.py +26 -0
- python_datamodel-0.10.1.dist-info/LICENSE +29 -0
- python_datamodel-0.10.1.dist-info/METADATA +320 -0
- python_datamodel-0.10.1.dist-info/RECORD +78 -0
- python_datamodel-0.10.1.dist-info/WHEEL +5 -0
- python_datamodel-0.10.1.dist-info/top_level.txt +7 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# cython: language_level=3, embedsignature=True, boundscheck=False, wraparound=True, initializedcheck=False
|
|
2
|
+
# Copyright (C) 2018-present Jesus Lara
|
|
3
|
+
#
|
|
4
|
+
"""
|
|
5
|
+
Module for JSON encoding and decoding using orjson with custom type handling.
|
|
6
|
+
|
|
7
|
+
This module provides the JSONContent class, which extends orjson's functionality to
|
|
8
|
+
support additional types (e.g., Decimal, datetime, custom Enum types, etc.), and a BaseEncoder
|
|
9
|
+
class as a drop-in replacement for json.dumps.
|
|
10
|
+
"""
|
|
11
|
+
import uuid
|
|
12
|
+
from pathlib import PosixPath, PurePath, Path
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from asyncpg.pgproto import pgproto
|
|
15
|
+
from psycopg2 import Binary
|
|
16
|
+
from cpython cimport PyErr_Clear
|
|
17
|
+
from cpython.object cimport (
|
|
18
|
+
PyObject_IsInstance,
|
|
19
|
+
PyObject_IsSubclass,
|
|
20
|
+
PyObject_TypeCheck,
|
|
21
|
+
PyObject_HasAttr,
|
|
22
|
+
PyObject_GetAttr,
|
|
23
|
+
PyCallable_Check
|
|
24
|
+
)
|
|
25
|
+
from dataclasses import _MISSING_TYPE, MISSING, InitVar
|
|
26
|
+
from typing import Any, Union
|
|
27
|
+
from decimal import Decimal
|
|
28
|
+
from enum import Enum, EnumType
|
|
29
|
+
import orjson
|
|
30
|
+
from ..exceptions cimport ParserError
|
|
31
|
+
from ..fields import Field
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
cdef inline bint is_callable(object obj):
|
|
35
|
+
cdef int res = PyCallable_Check(obj)
|
|
36
|
+
# PyCallable_Check normally returns 1 or 0, but if an error occurs,
|
|
37
|
+
# assume it's not callable.
|
|
38
|
+
return res != 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
cdef inline bint has_attribute(object obj, str name):
|
|
42
|
+
cdef int result = PyObject_HasAttr(obj, name)
|
|
43
|
+
if result < 0:
|
|
44
|
+
PyErr_Clear() # Clear any error that occurred
|
|
45
|
+
return False
|
|
46
|
+
return result != 0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
cdef inline object get_attribute(object obj, str name):
|
|
50
|
+
cdef object attr = PyObject_GetAttr(obj, name)
|
|
51
|
+
if attr is None:
|
|
52
|
+
PyErr_Clear() # Clear any error that occurred
|
|
53
|
+
return attr
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
cdef inline bint is_subclassof(object obj, object cls):
|
|
57
|
+
cdef int res = PyObject_IsSubclass(obj, cls)
|
|
58
|
+
if res < 0:
|
|
59
|
+
PyErr_Clear() # Clear error if subclass check fails
|
|
60
|
+
return False
|
|
61
|
+
return res != 0
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
ORJSON_DEFAULT_OPTIONS = (
|
|
65
|
+
orjson.OPT_SERIALIZE_NUMPY |
|
|
66
|
+
orjson.OPT_UTC_Z
|
|
67
|
+
# orjson.OPT_NON_STR_KEYS
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
cdef class JSONContent:
|
|
72
|
+
"""
|
|
73
|
+
A basic JSON encoder/decoder using orjson.
|
|
74
|
+
|
|
75
|
+
This class provides methods to serialize Python objects to JSON strings and deserialize
|
|
76
|
+
JSON strings back into Python objects, with custom handling for additional data types.
|
|
77
|
+
"""
|
|
78
|
+
def __call__(self, object obj, **kwargs):
|
|
79
|
+
return self.encode(obj, **kwargs)
|
|
80
|
+
|
|
81
|
+
def default(self, object obj):
|
|
82
|
+
if isinstance(obj, Decimal):
|
|
83
|
+
return float(obj)
|
|
84
|
+
elif isinstance(obj, datetime):
|
|
85
|
+
return str(obj)
|
|
86
|
+
elif has_attribute(obj, "isoformat"):
|
|
87
|
+
return obj.isoformat()
|
|
88
|
+
elif isinstance(obj, (PosixPath, PurePath, Path)):
|
|
89
|
+
return str(obj)
|
|
90
|
+
elif isinstance(obj, pgproto.UUID):
|
|
91
|
+
return str(obj)
|
|
92
|
+
elif isinstance(obj, uuid.UUID):
|
|
93
|
+
return obj
|
|
94
|
+
elif has_attribute(obj, "hex"):
|
|
95
|
+
try:
|
|
96
|
+
hex_method = get_attribute(obj, "hex")
|
|
97
|
+
if is_callable(hex_method):
|
|
98
|
+
return hex_method()
|
|
99
|
+
except AttributeError:
|
|
100
|
+
return obj.hex
|
|
101
|
+
if isinstance(obj, bytes):
|
|
102
|
+
return obj.hex()
|
|
103
|
+
else:
|
|
104
|
+
return obj.hex
|
|
105
|
+
elif has_attribute(obj, 'lower'): # asyncPg Range:
|
|
106
|
+
up = obj.upper
|
|
107
|
+
if isinstance(up, int):
|
|
108
|
+
up = up - 1 # discrete representation
|
|
109
|
+
return [obj.lower, up]
|
|
110
|
+
elif has_attribute(obj, 'tolist'): # numpy array
|
|
111
|
+
return obj.tolist()
|
|
112
|
+
elif isinstance(obj, _MISSING_TYPE):
|
|
113
|
+
return None
|
|
114
|
+
elif obj is MISSING:
|
|
115
|
+
return None
|
|
116
|
+
elif PyObject_IsInstance(obj, type) and is_subclassof(obj, Enum):
|
|
117
|
+
return [{'value': e.value, 'name': e.name} for e in obj]
|
|
118
|
+
elif isinstance(obj, Enum):
|
|
119
|
+
if has_attribute(obj, 'value'):
|
|
120
|
+
return obj.value
|
|
121
|
+
else:
|
|
122
|
+
return obj.name
|
|
123
|
+
elif isinstance(obj, Binary): # Handle bytea column from PostgreSQL
|
|
124
|
+
return str(obj) # Convert Binary object to string
|
|
125
|
+
elif isinstance(obj, Field):
|
|
126
|
+
if has_attribute(obj, 'to_dict'):
|
|
127
|
+
return obj.to_dict()
|
|
128
|
+
return str(obj)
|
|
129
|
+
elif has_attribute(obj, 'to_dict'):
|
|
130
|
+
return obj.to_dict()
|
|
131
|
+
elif has_attribute(obj, 'to_json'):
|
|
132
|
+
# Return a JSON representation of the object.
|
|
133
|
+
return orjson.Fragment(obj().encode())
|
|
134
|
+
elif isinstance(obj, InitVar) or type(obj).__name__ == 'InitVar':
|
|
135
|
+
# Handle InitVar explicitly
|
|
136
|
+
return None
|
|
137
|
+
raise TypeError(
|
|
138
|
+
f'{obj!r} of Type {type(obj)} is not JSON serializable'
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def encode(self, object obj, bint naive_utc = True, bint non_str_keys = False, **kwargs) -> str:
|
|
142
|
+
"""
|
|
143
|
+
Custom default method for handling non-standard JSON serializable types.
|
|
144
|
+
|
|
145
|
+
Supported types include Decimal, datetime, UUID, pathlib Paths, numpy arrays,
|
|
146
|
+
custom Enums, asyncpg Ranges, and more.
|
|
147
|
+
|
|
148
|
+
Parameters:
|
|
149
|
+
obj: The object to convert to a JSON-compatible format.
|
|
150
|
+
naive_utc (bool, optional): If True, convert datetime objects to naive UTC.
|
|
151
|
+
non_str_keys (bool, optional): If True, use non-string keys for dictionaries.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
A JSON-serializable representation of `obj`.
|
|
155
|
+
|
|
156
|
+
Raises:
|
|
157
|
+
TypeError: If `obj` is not JSON serializable.
|
|
158
|
+
"""
|
|
159
|
+
cdef int opt = ORJSON_DEFAULT_OPTIONS
|
|
160
|
+
if naive_utc:
|
|
161
|
+
opt |= orjson.OPT_NAIVE_UTC
|
|
162
|
+
if non_str_keys:
|
|
163
|
+
opt |= orjson.OPT_NON_STR_KEYS
|
|
164
|
+
options = {
|
|
165
|
+
"default": self.default,
|
|
166
|
+
"option": opt
|
|
167
|
+
}
|
|
168
|
+
options.update(kwargs)
|
|
169
|
+
try:
|
|
170
|
+
return orjson.dumps(
|
|
171
|
+
obj,
|
|
172
|
+
**options
|
|
173
|
+
).decode('utf-8')
|
|
174
|
+
except orjson.JSONEncodeError as ex:
|
|
175
|
+
raise ParserError(
|
|
176
|
+
f"Invalid JSON data: {ex}"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
dumps = encode
|
|
180
|
+
|
|
181
|
+
@classmethod
|
|
182
|
+
def dump(cls, object obj, **kwargs):
|
|
183
|
+
"""
|
|
184
|
+
Class method to encode an object using a new JSONContent instance.
|
|
185
|
+
"""
|
|
186
|
+
return cls().encode(obj, **kwargs)
|
|
187
|
+
|
|
188
|
+
def decode(self, object obj):
|
|
189
|
+
"""
|
|
190
|
+
Decode a JSON string into a Python object.
|
|
191
|
+
|
|
192
|
+
Parameters:
|
|
193
|
+
obj: A JSON string or bytes.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
The corresponding Python object.
|
|
197
|
+
|
|
198
|
+
Raises:
|
|
199
|
+
ParserError: If the JSON data is invalid.
|
|
200
|
+
"""
|
|
201
|
+
try:
|
|
202
|
+
return orjson.loads(
|
|
203
|
+
obj
|
|
204
|
+
)
|
|
205
|
+
except orjson.JSONDecodeError as ex:
|
|
206
|
+
raise ParserError(
|
|
207
|
+
f"Invalid JSON data: {ex}"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
loads = decode
|
|
211
|
+
|
|
212
|
+
@classmethod
|
|
213
|
+
def load(cls, object obj, **kwargs):
|
|
214
|
+
"""
|
|
215
|
+
Class method to decode JSON data using a new JSONContent instance.
|
|
216
|
+
"""
|
|
217
|
+
return cls().decode(obj, **kwargs)
|
|
218
|
+
|
|
219
|
+
cpdef str json_encoder(object obj, bint naive_utc = True, bint non_str_keys = False):
|
|
220
|
+
"""
|
|
221
|
+
Encode an object to JSON using the default JSONContent encoder.
|
|
222
|
+
"""
|
|
223
|
+
return JSONContent().dumps(obj, naive_utc=naive_utc, non_str_keys=non_str_keys)
|
|
224
|
+
|
|
225
|
+
cpdef object json_decoder(object obj):
|
|
226
|
+
"""
|
|
227
|
+
Decode JSON data using the default JSONContent decoder.
|
|
228
|
+
"""
|
|
229
|
+
return JSONContent().loads(obj)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
cpdef object json_fragment(bytes obj):
|
|
233
|
+
"""
|
|
234
|
+
Return a Orjson Fragment of an already-serialized JSON Document.
|
|
235
|
+
"""
|
|
236
|
+
return orjson.Fragment(obj)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
cdef class BaseEncoder:
|
|
240
|
+
"""
|
|
241
|
+
Encoder replacement for json.dumps but using orjson,
|
|
242
|
+
|
|
243
|
+
This is a drop-in replacement for json.dumps using orjson.
|
|
244
|
+
"""
|
|
245
|
+
def __init__(self, *args, **kwargs):
|
|
246
|
+
# Filter/adapt JSON arguments to ORJSON ones
|
|
247
|
+
rjargs = ()
|
|
248
|
+
rjkwargs = {}
|
|
249
|
+
encoder = JSONContent(*rjargs, **rjkwargs)
|
|
250
|
+
self.encode = encoder.__call__
|
datamodel/profiler.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import cProfile
|
|
2
|
+
import functools
|
|
3
|
+
import pstats
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def profile(func):
|
|
7
|
+
|
|
8
|
+
@functools.wraps(func)
|
|
9
|
+
def inner(*args, **kwargs):
|
|
10
|
+
profiler = cProfile.Profile()
|
|
11
|
+
profiler.enable()
|
|
12
|
+
try:
|
|
13
|
+
retval = func(*args, **kwargs)
|
|
14
|
+
finally:
|
|
15
|
+
profiler.disable()
|
|
16
|
+
with open('profile.out', 'w') as profile_file:
|
|
17
|
+
stats = pstats.Stats(profiler, stream=profile_file)
|
|
18
|
+
stats.print_stats()
|
|
19
|
+
return retval
|
|
20
|
+
|
|
21
|
+
return inner
|
datamodel/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "rs_core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
authors = ["Jesus Lara <jesuslarag@gmail.com>"]
|
|
6
|
+
description = "Core Implementation for parsing and validating Datamodel Fields."
|
|
7
|
+
license = "MIT"
|
|
8
|
+
repository = "https://github.com/phenobarbital/python-datamodel"
|
|
9
|
+
|
|
10
|
+
[lib]
|
|
11
|
+
name = "rs_core"
|
|
12
|
+
crate-type = ["cdylib"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
[dependencies]
|
|
16
|
+
pyo3 = { version = "0.23.3", features = ["generate-import-lib", "extension-module"] }
|
|
17
|
+
rayon = "1.5.3"
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
use pyo3::prelude::*;
|
|
2
|
+
// use pyo3::exceptions::PyValueError;
|
|
3
|
+
use pyo3::exceptions::PyTypeError;
|
|
4
|
+
use pyo3::wrap_pyfunction;
|
|
5
|
+
use pyo3::types::PyType;
|
|
6
|
+
use pyo3::types::{PyDate, PyDateTime, PyAny, PyDict};
|
|
7
|
+
// use pyo3::PyTypeInfo;
|
|
8
|
+
// use chrono::{Datelike, Timelike, NaiveDate, NaiveTime, NaiveDateTime, DateTime, Utc};
|
|
9
|
+
use rayon::prelude::*;
|
|
10
|
+
// use std::collections::HashMap;
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
#[pyfunction]
|
|
14
|
+
fn validate_datamodel(py: Python<'_>, dataclass_instance: PyObject) -> PyResult<Vec<(String, bool)>> {
|
|
15
|
+
// Get the class of the instance
|
|
16
|
+
let dataclass: &PyType = dataclass_instance.as_ref(py).get_type();
|
|
17
|
+
|
|
18
|
+
// Get the __dataclass_fields__ attribute from the class
|
|
19
|
+
let fields_dict: &PyDict = dataclass.getattr("__dataclass_fields__")?.downcast::<PyDict>()?;
|
|
20
|
+
|
|
21
|
+
// Validate each field in the main thread
|
|
22
|
+
let results: Vec<(String, bool)> = fields_dict
|
|
23
|
+
.items()
|
|
24
|
+
.iter()
|
|
25
|
+
.map(|item| {
|
|
26
|
+
let (key, field) = item.extract::<(String, &PyAny)>().unwrap();
|
|
27
|
+
|
|
28
|
+
// Extract information from the dataclass.Field object
|
|
29
|
+
let field_type = field.getattr("type").unwrap().to_object(py);
|
|
30
|
+
let value = dataclass_instance.getattr(py, key.as_str()).unwrap();
|
|
31
|
+
|
|
32
|
+
let is_valid = match validate_field(py, &field_type, &value) {
|
|
33
|
+
Ok(result) => result,
|
|
34
|
+
Err(e) => {
|
|
35
|
+
eprintln!("Validation error for field {}: {}", key, e);
|
|
36
|
+
false
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
(key.to_string(), is_valid)
|
|
40
|
+
})
|
|
41
|
+
.collect();
|
|
42
|
+
|
|
43
|
+
Ok(results)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
fn validate_field(py: Python<'_>, field_type: &PyObject, value: &PyObject) -> PyResult<bool> {
|
|
47
|
+
// Check if it's a primitive type
|
|
48
|
+
if let Ok(type_) = field_type.extract::<&PyType>(py) {
|
|
49
|
+
let type_name = type_.name()?;
|
|
50
|
+
match type_name {
|
|
51
|
+
"str" => {
|
|
52
|
+
return Ok(value.extract::<&str>(py).is_ok());
|
|
53
|
+
}
|
|
54
|
+
"int" => {
|
|
55
|
+
return Ok(value.extract::<i64>(py).is_ok());
|
|
56
|
+
}
|
|
57
|
+
"float" => {
|
|
58
|
+
return Ok(value.extract::<f64>(py).is_ok());
|
|
59
|
+
}
|
|
60
|
+
"bool" => {
|
|
61
|
+
return Ok(value.extract::<bool>(py).is_ok());
|
|
62
|
+
}
|
|
63
|
+
"datetime" => {
|
|
64
|
+
return Ok(value.extract::<&PyDateTime>(py).is_ok());
|
|
65
|
+
}
|
|
66
|
+
"date" => {
|
|
67
|
+
return Ok(value.extract::<&PyDate>(py).is_ok());
|
|
68
|
+
}
|
|
69
|
+
_ => {
|
|
70
|
+
// Not a primitive type, you can either skip validation or return an error
|
|
71
|
+
// eprintln!("Skipping validation for non-primitive type: {}", type_name);
|
|
72
|
+
// Ok(true) // Option 1: Skip validation
|
|
73
|
+
return Err(PyTypeError::new_err(format!(
|
|
74
|
+
"Validation for type {} is not implemented yet.",
|
|
75
|
+
type_name
|
|
76
|
+
))); // Option 2: Return an error
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
} else {
|
|
80
|
+
// Handle the case where field_type is not a PyType (e.g., it's a generic type)
|
|
81
|
+
eprintln!("Field type is not a PyType: {:?}", field_type);
|
|
82
|
+
return Err(PyTypeError::new_err(
|
|
83
|
+
"Field type is not a PyType, cannot validate.",
|
|
84
|
+
));
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
#[derive(Debug)]
|
|
89
|
+
enum FieldType {
|
|
90
|
+
Str,
|
|
91
|
+
Int,
|
|
92
|
+
Float,
|
|
93
|
+
Bool,
|
|
94
|
+
DateTime,
|
|
95
|
+
Date,
|
|
96
|
+
Time,
|
|
97
|
+
// Extend with more types as needed
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
impl FieldType {
|
|
101
|
+
/// Convert type name string to FieldType enum
|
|
102
|
+
fn from_str(type_name: &str) -> Option<Self> {
|
|
103
|
+
match type_name {
|
|
104
|
+
"str" => Some(FieldType::Str),
|
|
105
|
+
"int" => Some(FieldType::Int),
|
|
106
|
+
"float" => Some(FieldType::Float),
|
|
107
|
+
"bool" => Some(FieldType::Bool),
|
|
108
|
+
"datetime.datetime" => Some(FieldType::DateTime),
|
|
109
|
+
"datetime.date" => Some(FieldType::Date),
|
|
110
|
+
"datetime.time" => Some(FieldType::Time),
|
|
111
|
+
_ => None,
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/// Parse the string representation into Rust-native types if necessary
|
|
116
|
+
fn parse(&self, value: &FieldValue) -> bool {
|
|
117
|
+
match self {
|
|
118
|
+
FieldType::Str => true, // Already a string
|
|
119
|
+
FieldType::Int => true, // Already an integer
|
|
120
|
+
FieldType::Float => true, // Already a float
|
|
121
|
+
FieldType::Bool => true, // Already a bool
|
|
122
|
+
FieldType::DateTime => {
|
|
123
|
+
if let FieldValue::Str(s) = value {
|
|
124
|
+
DateTime::parse_from_rfc3339(s).is_ok()
|
|
125
|
+
} else {
|
|
126
|
+
false
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
FieldType::Date => {
|
|
130
|
+
if let FieldValue::Str(s) = value {
|
|
131
|
+
NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
|
|
132
|
+
} else {
|
|
133
|
+
false
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
FieldType::Time => {
|
|
137
|
+
if let FieldValue::Str(s) = value {
|
|
138
|
+
NaiveTime::parse_from_str(s, "%H:%M:%S").is_ok()
|
|
139
|
+
} else {
|
|
140
|
+
false
|
|
141
|
+
}
|
|
142
|
+
},
|
|
143
|
+
// Implement other parsing as needed
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/// Validate the PyObject against the FieldType
|
|
148
|
+
fn validate(&self, value: &FieldValue) -> bool {
|
|
149
|
+
match self {
|
|
150
|
+
FieldType::Str => matches!(value, FieldValue::Str(_)),
|
|
151
|
+
FieldType::Int => matches!(value, FieldValue::Int(_)),
|
|
152
|
+
FieldType::Float => matches!(value, FieldValue::Float(_)),
|
|
153
|
+
FieldType::Bool => matches!(value, FieldValue::Bool(_)),
|
|
154
|
+
FieldType::DateTime => matches!(value, FieldValue::DateTime(_)),
|
|
155
|
+
FieldType::Date => matches!(value, FieldValue::Date(_)),
|
|
156
|
+
FieldType::Time => matches!(value, FieldValue::Time(_)),
|
|
157
|
+
// Add more validations as needed
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/// Enum representing the Rust-native value of a field
|
|
163
|
+
#[derive(Debug)]
|
|
164
|
+
enum FieldValue {
|
|
165
|
+
Str(String),
|
|
166
|
+
Int(i64),
|
|
167
|
+
Float(f64),
|
|
168
|
+
Bool(bool),
|
|
169
|
+
DateTime(String), // Store as String; parse validation done separately
|
|
170
|
+
Date(String),
|
|
171
|
+
Time(String),
|
|
172
|
+
// Extend with more types as needed
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// A Rust struct representing the minimal info we need from each dataclass Field
|
|
176
|
+
#[derive(Debug)]
|
|
177
|
+
struct RustFieldInfo {
|
|
178
|
+
pub field_name: String,
|
|
179
|
+
pub field_type: FieldType,
|
|
180
|
+
pub type_name: String, // Assuming type is always present for simplicity
|
|
181
|
+
value: FieldValue,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/// Collect the minimal field data we need into native Rust structs
|
|
185
|
+
fn get_field_info(py: Python<'_>, dataclass_instance: &PyObject, fields_dict: &PyDict) -> PyResult<Vec<RustFieldInfo>> {
|
|
186
|
+
let mut result = Vec::new();
|
|
187
|
+
|
|
188
|
+
for (key, field_obj) in fields_dict.iter() {
|
|
189
|
+
let field_name = key.extract::<String>()?;
|
|
190
|
+
|
|
191
|
+
// Extract type name
|
|
192
|
+
let type_obj = field_obj.getattr("type")?;
|
|
193
|
+
let type_name = type_obj.extract::<&PyType>()?.name()?.to_string();
|
|
194
|
+
|
|
195
|
+
// Convert type name to FieldType enum
|
|
196
|
+
let field_type = match FieldType::from_str(&type_name) {
|
|
197
|
+
Some(ft) => ft,
|
|
198
|
+
None => continue, // Skip unsupported types or handle as needed
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
// Extract value
|
|
202
|
+
let py_value = dataclass_instance.getattr(py, &field_name[..])?;
|
|
203
|
+
|
|
204
|
+
// Convert PyObject to Rust-native FieldValue
|
|
205
|
+
let value = match field_type {
|
|
206
|
+
FieldType::Str => {
|
|
207
|
+
FieldValue::Str(py_value.extract::<String>(py)?)
|
|
208
|
+
},
|
|
209
|
+
FieldType::Int => {
|
|
210
|
+
FieldValue::Int(py_value.extract::<i64>(py)?)
|
|
211
|
+
},
|
|
212
|
+
FieldType::Float => {
|
|
213
|
+
FieldValue::Float(py_value.extract::<f64>(py)?)
|
|
214
|
+
},
|
|
215
|
+
FieldType::Bool => {
|
|
216
|
+
FieldValue::Bool(py_value.extract::<bool>(py)?)
|
|
217
|
+
},
|
|
218
|
+
FieldType::DateTime => {
|
|
219
|
+
let s: String = py_value.extract::<String>(py)?;
|
|
220
|
+
FieldValue::DateTime(s)
|
|
221
|
+
},
|
|
222
|
+
FieldType::Date => {
|
|
223
|
+
let s: String = py_value.extract::<String>(py)?;
|
|
224
|
+
FieldValue::Date(s)
|
|
225
|
+
},
|
|
226
|
+
FieldType::Time => {
|
|
227
|
+
let s: String = py_value.extract::<String>(py)?;
|
|
228
|
+
FieldValue::Time(s)
|
|
229
|
+
},
|
|
230
|
+
// Handle other types as needed
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
result.push(RustFieldInfo {
|
|
234
|
+
field_name,
|
|
235
|
+
field_type,
|
|
236
|
+
type_name,
|
|
237
|
+
value,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
Ok(result)
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/// A mock-up function that showcases a single iteration over fields
|
|
245
|
+
/// performing these steps:
|
|
246
|
+
/// 1) Handle `default` or `default_factory` if the field is missing
|
|
247
|
+
/// 2) Parse the field's value (e.g. str -> UUID, str -> date, etc.)
|
|
248
|
+
/// 3) Validate the resulting value against the annotated type
|
|
249
|
+
#[pyfunction]
|
|
250
|
+
fn parse_datamodel(py: Python<'_>, dataclass_instance: PyObject) -> PyResult<Vec<(String, bool)>> {
|
|
251
|
+
// Acquire the GIL using `Python::with_gil`
|
|
252
|
+
Python::with_gil(|py| {
|
|
253
|
+
// 1) Get dataclass instance's class
|
|
254
|
+
let dataclass_type: &PyType = dataclass_instance.as_ref(py).get_type();
|
|
255
|
+
|
|
256
|
+
// 2) Get __dataclass_fields__ from the class
|
|
257
|
+
let fields_dict: &PyDict = dataclass_type
|
|
258
|
+
.getattr("__dataclass_fields__")?
|
|
259
|
+
.downcast::<PyDict>()?;
|
|
260
|
+
|
|
261
|
+
// 3) Convert Python fields into a native Rust Vec<RustFieldInfo>
|
|
262
|
+
let field_infos = get_field_info(py, &dataclass_instance, fields_dict)?;
|
|
263
|
+
|
|
264
|
+
// 4) Drop the GIL before parallel processing
|
|
265
|
+
// Note: `Python::with_gil` automatically drops the GIL when the closure ends
|
|
266
|
+
// Hence, no need to explicitly drop `py` here
|
|
267
|
+
|
|
268
|
+
// 5) Perform parallel iteration over `field_infos`
|
|
269
|
+
let results: Vec<(String, bool)> = field_infos
|
|
270
|
+
.into_par_iter()
|
|
271
|
+
.map(|field_info| {
|
|
272
|
+
// Perform parsing and validation purely in Rust
|
|
273
|
+
let is_parsed = field_info.field_type.parse(&field_info.value);
|
|
274
|
+
if !is_parsed {
|
|
275
|
+
return (field_info.field_name, false);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
let is_valid = field_info.field_type.validate(&field_info.value);
|
|
279
|
+
(field_info.field_name, is_valid)
|
|
280
|
+
})
|
|
281
|
+
.collect();
|
|
282
|
+
|
|
283
|
+
Ok(results)
|
|
284
|
+
})
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
/// Python module declaration
|
|
289
|
+
#[pymodule]
|
|
290
|
+
fn rs_core(_py: Python, m: &PyModule) -> PyResult<()> {
|
|
291
|
+
m.add_function(wrap_pyfunction!(validate_datamodel, m)?)?;
|
|
292
|
+
m.add_function(wrap_pyfunction!(parse_datamodel, m)?)?;
|
|
293
|
+
Ok(())
|
|
294
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "rs_parsers"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
authors = ["Jesus Lara <jesuslarag@gmail.com>"]
|
|
6
|
+
description = "Parallel DataModel Parser and validator using Rust"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
repository = "https://github.com/phenobarbital/python-datamodel"
|
|
9
|
+
|
|
10
|
+
[lib]
|
|
11
|
+
name = "rs_parsers"
|
|
12
|
+
crate-type = ["cdylib"]
|
|
13
|
+
|
|
14
|
+
[dependencies]
|
|
15
|
+
pyo3 = { version = "0.23.3", features = ["generate-import-lib", "extension-module"] }
|
|
16
|
+
rayon = "1.5.3"
|
|
17
|
+
chrono = "0.4.39"
|
|
18
|
+
speedate = "0.15.0"
|
|
19
|
+
uuid = "1.11.0"
|
|
20
|
+
fastuuid = "0.3.0"
|
|
21
|
+
rust_decimal = "1.36"
|
|
22
|
+
rust_decimal_macros = "1.36"
|