arize 8.0.0a2__tar.gz → 8.0.0a4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize-8.0.0a2 → arize-8.0.0a4}/PKG-INFO +165 -9
- {arize-8.0.0a2 → arize-8.0.0a4}/README.md +155 -7
- {arize-8.0.0a2 → arize-8.0.0a4}/pyproject.toml +11 -1
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_exporter/client.py +28 -8
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_exporter/parsers/tracing_data_parser.py +7 -4
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_exporter/validation.py +7 -3
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_flight/client.py +11 -14
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_lazy.py +38 -36
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/client.py +36 -4
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/config.py +37 -3
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/constants/config.py +6 -0
- arize-8.0.0a4/src/arize/constants/ml.py +59 -0
- arize-8.0.0a4/src/arize/constants/model_mapping.json +199 -0
- arize-8.0.0a4/src/arize/exceptions/base.py +106 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/exceptions/models.py +12 -0
- arize-8.0.0a4/src/arize/exceptions/parameters.py +389 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/exceptions/values.py +16 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/logging.py +6 -6
- arize-8.0.0a4/src/arize/models/batch_validation/errors.py +1145 -0
- arize-8.0.0a4/src/arize/models/batch_validation/validator.py +3711 -0
- arize-8.0.0a4/src/arize/models/bounded_executor.py +34 -0
- arize-8.0.0a4/src/arize/models/client.py +807 -0
- arize-8.0.0a4/src/arize/models/stream_validation.py +214 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/client.py +55 -188
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/annotations/annotations_validation.py +8 -4
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/annotations/dataframe_form_validation.py +6 -2
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/annotations/value_validation.py +6 -3
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/common/argument_validation.py +5 -2
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/common/dataframe_form_validation.py +5 -2
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/evals/evals_validation.py +8 -4
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/evals/value_validation.py +8 -4
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/metadata/argument_validation.py +5 -2
- arize-8.0.0a4/src/arize/spans/validation/spans/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/spans/spans_validation.py +8 -4
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/spans/value_validation.py +8 -5
- arize-8.0.0a4/src/arize/types.py +1754 -0
- arize-8.0.0a4/src/arize/utils/__init__.py +0 -0
- arize-8.0.0a4/src/arize/utils/arrow.py +178 -0
- arize-8.0.0a4/src/arize/utils/casting.py +396 -0
- arize-8.0.0a4/src/arize/utils/proto.py +865 -0
- arize-8.0.0a4/src/arize/version.py +1 -0
- arize-8.0.0a2/src/arize/constants/ml.py +0 -57
- arize-8.0.0a2/src/arize/exceptions/base.py +0 -101
- arize-8.0.0a2/src/arize/exceptions/parameters.py +0 -371
- arize-8.0.0a2/src/arize/types.py +0 -1699
- arize-8.0.0a2/src/arize/utils/arrow.py +0 -37
- arize-8.0.0a2/src/arize/utils/proto.py +0 -424
- arize-8.0.0a2/src/arize/version.py +0 -1
- {arize-8.0.0a2 → arize-8.0.0a4}/.gitignore +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/LICENSE.md +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_exporter/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_exporter/parsers/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_flight/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_flight/types.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/api/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/api/datasets_api.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/api/experiments_api.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/api_client.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/api_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/configuration.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/exceptions.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/dataset.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/dataset_version.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/datasets_create201_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/datasets_create_request.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/datasets_list200_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/datasets_list_examples200_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/error.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/experiment.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/models/experiments_list200_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/rest.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_dataset.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_dataset_version.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_datasets_api.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_datasets_create201_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_datasets_create_request.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_datasets_list200_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_datasets_list_examples200_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_error.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_experiment.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_experiments_api.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client/test/test_experiments_list200_response.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/api_client_README.md +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/protocol/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/protocol/flight/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/protocol/flight/export_pb2.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/protocol/flight/ingest_pb2.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/protocol/rec/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_generated/protocol/rec/public_pb2.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/constants/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/constants/spans.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/datasets/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/datasets/client.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/exceptions/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/exceptions/auth.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/exceptions/spaces.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/exceptions/types.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/experiments/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/experiments/client.py +0 -0
- {arize-8.0.0a2/src/arize/spans → arize-8.0.0a4/src/arize/models}/__init__.py +0 -0
- {arize-8.0.0a2/src/arize/spans/validation → arize-8.0.0a4/src/arize/models/batch_validation}/__init__.py +0 -0
- {arize-8.0.0a2/src/arize/spans/validation/annotations → arize-8.0.0a4/src/arize/spans}/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/columns.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/conversion.py +0 -0
- {arize-8.0.0a2/src/arize/spans/validation/common → arize-8.0.0a4/src/arize/spans/validation}/__init__.py +0 -0
- {arize-8.0.0a2/src/arize/spans/validation/evals → arize-8.0.0a4/src/arize/spans/validation/annotations}/__init__.py +0 -0
- {arize-8.0.0a2/src/arize/spans/validation/spans → arize-8.0.0a4/src/arize/spans/validation/common}/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/common/errors.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/common/value_validation.py +0 -0
- {arize-8.0.0a2/src/arize/utils → arize-8.0.0a4/src/arize/spans/validation/evals}/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/evals/dataframe_form_validation.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/metadata/__init__.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/metadata/dataframe_form_validation.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/metadata/value_validation.py +0 -0
- {arize-8.0.0a2 → arize-8.0.0a4}/src/arize/spans/validation/spans/dataframe_form_validation.py +0 -0
- /arize-8.0.0a2/src/arize/utils/pandas.py → /arize-8.0.0a4/src/arize/utils/dataframe.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arize
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.0a4
|
|
4
4
|
Summary: A helper library to interact with Arize AI APIs
|
|
5
5
|
Project-URL: Homepage, https://arize.com
|
|
6
6
|
Project-URL: Documentation, https://docs.arize.com/arize
|
|
@@ -26,11 +26,19 @@ Classifier: Topic :: System :: Logging
|
|
|
26
26
|
Classifier: Topic :: System :: Monitoring
|
|
27
27
|
Requires-Python: >=3.10
|
|
28
28
|
Requires-Dist: lazy-imports
|
|
29
|
+
Requires-Dist: numpy>=2.0.0
|
|
29
30
|
Provides-Extra: dev
|
|
30
31
|
Requires-Dist: pytest==8.4.2; extra == 'dev'
|
|
31
32
|
Requires-Dist: ruff==0.13.2; extra == 'dev'
|
|
33
|
+
Provides-Extra: ml-batch
|
|
34
|
+
Requires-Dist: pandas<3,>=1.0.0; extra == 'ml-batch'
|
|
35
|
+
Requires-Dist: protobuf<6,>=4.21.0; extra == 'ml-batch'
|
|
36
|
+
Requires-Dist: pyarrow>=0.15.0; extra == 'ml-batch'
|
|
37
|
+
Requires-Dist: tqdm; extra == 'ml-batch'
|
|
38
|
+
Provides-Extra: ml-stream
|
|
39
|
+
Requires-Dist: protobuf<6,>=4.21.0; extra == 'ml-stream'
|
|
40
|
+
Requires-Dist: requests-futures<2,>=1.0.0; extra == 'ml-stream'
|
|
32
41
|
Provides-Extra: spans
|
|
33
|
-
Requires-Dist: numpy>=2.0.0; extra == 'spans'
|
|
34
42
|
Requires-Dist: openinference-semantic-conventions<1,>=0.1.21; extra == 'spans'
|
|
35
43
|
Requires-Dist: opentelemetry-semantic-conventions<1,>=0.43b0; extra == 'spans'
|
|
36
44
|
Requires-Dist: pandas<3,>=1.0.0; extra == 'spans'
|
|
@@ -112,13 +120,14 @@ tracer_provider = register(
|
|
|
112
120
|
OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
|
|
113
121
|
```
|
|
114
122
|
|
|
115
|
-
|
|
116
|
-
### Logging Spans, Evaluations, and Annotations
|
|
123
|
+
### Operations on Spans
|
|
117
124
|
|
|
118
125
|
Use `arize.spans` to interact with spans: log spans into Arize, update the span's evaluations, annotations and metadata in bulk.
|
|
119
126
|
|
|
120
127
|
> **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
|
|
121
128
|
|
|
129
|
+
#### Logging spans
|
|
130
|
+
|
|
122
131
|
```python
|
|
123
132
|
from arize import ArizeClient
|
|
124
133
|
|
|
@@ -130,22 +139,169 @@ client.spans.log(
|
|
|
130
139
|
space_id=SPACE_ID,
|
|
131
140
|
project_name=PROJECT_NAME,
|
|
132
141
|
dataframe=spans_df,
|
|
133
|
-
evals_df=evals_df, # Optionally pass the evaluations together with the spans
|
|
142
|
+
# evals_df=evals_df, # Optionally pass the evaluations together with the spans
|
|
134
143
|
)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
#### Update spans Evaluations, Annotations, and Metadata
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from arize import ArizeClient
|
|
150
|
+
|
|
151
|
+
client = ArizeClient(api_key=API_KEY)
|
|
152
|
+
SPACE_ID = "<your-space-id>"
|
|
153
|
+
PROJECT_NAME = "<your-project-name>"
|
|
135
154
|
|
|
136
155
|
client.spans.update_evaluations(
|
|
156
|
+
space_id=SPACE_ID,
|
|
157
|
+
project_name=PROJECT_NAME,
|
|
137
158
|
dataframe=evals_df,
|
|
138
|
-
|
|
159
|
+
# force_http=... # Optionally pass force_http to update evaluations via HTTP instead of gRPC, defaults to False
|
|
139
160
|
)
|
|
140
161
|
|
|
141
162
|
client.spans.update_annotations(
|
|
163
|
+
space_id=SPACE_ID,
|
|
164
|
+
project_name=PROJECT_NAME,
|
|
142
165
|
dataframe=annotations_df,
|
|
143
|
-
project_name="your-llm-project",
|
|
144
166
|
)
|
|
145
167
|
|
|
146
168
|
client.spans.update_metadata(
|
|
147
|
-
|
|
148
|
-
project_name=
|
|
169
|
+
space_id=SPACE_ID,
|
|
170
|
+
project_name=PROJECT_NAME,
|
|
171
|
+
dataframe=metadata_df,
|
|
172
|
+
)
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
#### Exporting spans
|
|
176
|
+
|
|
177
|
+
Use the `export_to_df` or `export_to_parquet` to export large amounts of spans from Arize.
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
from arize import ArizeClient
|
|
181
|
+
from datetime import datetime
|
|
182
|
+
|
|
183
|
+
FMT = "%Y-%m-%d"
|
|
184
|
+
start_time = datetime.strptime("2024-01-01",FMT)
|
|
185
|
+
end_time = datetime.strptime("2026-01-01",FMT)
|
|
186
|
+
|
|
187
|
+
client = ArizeClient(api_key=API_KEY)
|
|
188
|
+
SPACE_ID = "<your-space-id>"
|
|
189
|
+
PROJECT_NAME = "<your-project-name>"
|
|
190
|
+
|
|
191
|
+
df = client.spans.export_to_df(
|
|
192
|
+
space_id=SPACE_ID,
|
|
193
|
+
project_name=PROJECT_NAME,
|
|
194
|
+
start_time=start_time,
|
|
195
|
+
end_time=end_time,
|
|
196
|
+
)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Operations on ML Models
|
|
200
|
+
|
|
201
|
+
Use `arize.models` to interact with ML models: log ML data (traininv, validation, production) into Arize, either streaming or in batches.
|
|
202
|
+
|
|
203
|
+
> **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
|
|
204
|
+
|
|
205
|
+
#### Stream log ML Data for a Classification use-case
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
from arize import ArizeClient
|
|
209
|
+
from arize.types import ModelTypes, Environments
|
|
210
|
+
|
|
211
|
+
client = ArizeClient(api_key=API_KEY)
|
|
212
|
+
SPACE_ID = "<your-space-id>"
|
|
213
|
+
MODEL_NAME = "<your-model-name>"
|
|
214
|
+
|
|
215
|
+
features=...
|
|
216
|
+
embedding_features=...
|
|
217
|
+
|
|
218
|
+
response = client.models.log_stream(
|
|
219
|
+
space_id=SPACE_ID,
|
|
220
|
+
model_name=MODEL_NAME,
|
|
221
|
+
model_type=ModelTypes.SCORE_CATEGORICAL,
|
|
222
|
+
environment=Environments.PRODUCTION,
|
|
223
|
+
prediction_label=("not fraud",0.3),
|
|
224
|
+
actual_label=("fraud",1.0),
|
|
225
|
+
features=features,
|
|
226
|
+
embedding_features=embedding_features,
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
#### Log a batch of ML Data for a Classification use-case
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
from arize import ArizeClient
|
|
234
|
+
from arize.types import ModelTypes, Environments
|
|
235
|
+
|
|
236
|
+
client = ArizeClient(api_key=API_KEY)
|
|
237
|
+
SPACE_ID = "<your-space-id>"
|
|
238
|
+
MODEL_NAME = "<your-model-name>"
|
|
239
|
+
MODEL_VERSION = "1.0"
|
|
240
|
+
|
|
241
|
+
from arize.types import Schema, EmbeddingColumnNames, ObjectDetectionColumnNames, ModelTypes, Environments
|
|
242
|
+
|
|
243
|
+
tags = ["drift_type"]
|
|
244
|
+
embedding_feature_column_names = {
|
|
245
|
+
"image_embedding": EmbeddingColumnNames(
|
|
246
|
+
vector_column_name="image_vector", link_to_data_column_name="url"
|
|
247
|
+
)
|
|
248
|
+
}
|
|
249
|
+
object_detection_prediction_column_names = ObjectDetectionColumnNames(
|
|
250
|
+
bounding_boxes_coordinates_column_name="prediction_bboxes",
|
|
251
|
+
categories_column_name="prediction_categories",
|
|
252
|
+
scores_column_name="prediction_scores",
|
|
253
|
+
)
|
|
254
|
+
object_detection_actual_column_names = ObjectDetectionColumnNames(
|
|
255
|
+
bounding_boxes_coordinates_column_name="actual_bboxes",
|
|
256
|
+
categories_column_name="actual_categories",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Define a Schema() object for Arize to pick up data from the correct columns for logging
|
|
260
|
+
schema = Schema(
|
|
261
|
+
prediction_id_column_name="prediction_id",
|
|
262
|
+
timestamp_column_name="prediction_ts",
|
|
263
|
+
tag_column_names=tags,
|
|
264
|
+
embedding_feature_column_names=embedding_feature_column_names,
|
|
265
|
+
object_detection_prediction_column_names=object_detection_prediction_column_names,
|
|
266
|
+
object_detection_actual_column_names=object_detection_actual_column_names,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Logging Production DataFrame
|
|
270
|
+
response = client.models.log_batch(
|
|
271
|
+
space_id=SPACE_ID,
|
|
272
|
+
model_name=MODEL_NAME,
|
|
273
|
+
model_type=ModelTypes.OBJECT_DETECTION,
|
|
274
|
+
dataframe=prod_df,
|
|
275
|
+
schema=schema,
|
|
276
|
+
environment=Environments.PRODUCTION,
|
|
277
|
+
model_version = MODEL_VERSION, # Optionally pass a model version
|
|
278
|
+
)
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
#### Exporting ML Data
|
|
282
|
+
|
|
283
|
+
Use the `export_to_df` or `export_to_parquet` to export large amounts of spans from Arize.
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
from arize import ArizeClient
|
|
287
|
+
from datetime import datetime
|
|
288
|
+
|
|
289
|
+
FMT = "%Y-%m-%d"
|
|
290
|
+
start_time = datetime.strptime("2024-01-01",FMT)
|
|
291
|
+
end_time = datetime.strptime("2026-01-01",FMT)
|
|
292
|
+
|
|
293
|
+
client = ArizeClient(api_key=API_KEY)
|
|
294
|
+
SPACE_ID = "<your-space-id>"
|
|
295
|
+
MODEL_NAME = "<your-model-name>"
|
|
296
|
+
MODEL_VERSION = "1.0"
|
|
297
|
+
|
|
298
|
+
df = client.models.export_to_df(
|
|
299
|
+
space_id=SPACE_ID,
|
|
300
|
+
model_name=MODEL_NAME,
|
|
301
|
+
environment=Environments.TRAINING,
|
|
302
|
+
model_version=MODEL_VERSION,
|
|
303
|
+
start_time=start_time,
|
|
304
|
+
end_time=end_time,
|
|
149
305
|
)
|
|
150
306
|
```
|
|
151
307
|
|
|
@@ -71,13 +71,14 @@ tracer_provider = register(
|
|
|
71
71
|
OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
|
|
72
72
|
```
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
### Logging Spans, Evaluations, and Annotations
|
|
74
|
+
### Operations on Spans
|
|
76
75
|
|
|
77
76
|
Use `arize.spans` to interact with spans: log spans into Arize, update the span's evaluations, annotations and metadata in bulk.
|
|
78
77
|
|
|
79
78
|
> **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
|
|
80
79
|
|
|
80
|
+
#### Logging spans
|
|
81
|
+
|
|
81
82
|
```python
|
|
82
83
|
from arize import ArizeClient
|
|
83
84
|
|
|
@@ -89,22 +90,169 @@ client.spans.log(
|
|
|
89
90
|
space_id=SPACE_ID,
|
|
90
91
|
project_name=PROJECT_NAME,
|
|
91
92
|
dataframe=spans_df,
|
|
92
|
-
evals_df=evals_df, # Optionally pass the evaluations together with the spans
|
|
93
|
+
# evals_df=evals_df, # Optionally pass the evaluations together with the spans
|
|
93
94
|
)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
#### Update spans Evaluations, Annotations, and Metadata
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from arize import ArizeClient
|
|
101
|
+
|
|
102
|
+
client = ArizeClient(api_key=API_KEY)
|
|
103
|
+
SPACE_ID = "<your-space-id>"
|
|
104
|
+
PROJECT_NAME = "<your-project-name>"
|
|
94
105
|
|
|
95
106
|
client.spans.update_evaluations(
|
|
107
|
+
space_id=SPACE_ID,
|
|
108
|
+
project_name=PROJECT_NAME,
|
|
96
109
|
dataframe=evals_df,
|
|
97
|
-
|
|
110
|
+
# force_http=... # Optionally pass force_http to update evaluations via HTTP instead of gRPC, defaults to False
|
|
98
111
|
)
|
|
99
112
|
|
|
100
113
|
client.spans.update_annotations(
|
|
114
|
+
space_id=SPACE_ID,
|
|
115
|
+
project_name=PROJECT_NAME,
|
|
101
116
|
dataframe=annotations_df,
|
|
102
|
-
project_name="your-llm-project",
|
|
103
117
|
)
|
|
104
118
|
|
|
105
119
|
client.spans.update_metadata(
|
|
106
|
-
|
|
107
|
-
project_name=
|
|
120
|
+
space_id=SPACE_ID,
|
|
121
|
+
project_name=PROJECT_NAME,
|
|
122
|
+
dataframe=metadata_df,
|
|
123
|
+
)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
#### Exporting spans
|
|
127
|
+
|
|
128
|
+
Use the `export_to_df` or `export_to_parquet` to export large amounts of spans from Arize.
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from arize import ArizeClient
|
|
132
|
+
from datetime import datetime
|
|
133
|
+
|
|
134
|
+
FMT = "%Y-%m-%d"
|
|
135
|
+
start_time = datetime.strptime("2024-01-01",FMT)
|
|
136
|
+
end_time = datetime.strptime("2026-01-01",FMT)
|
|
137
|
+
|
|
138
|
+
client = ArizeClient(api_key=API_KEY)
|
|
139
|
+
SPACE_ID = "<your-space-id>"
|
|
140
|
+
PROJECT_NAME = "<your-project-name>"
|
|
141
|
+
|
|
142
|
+
df = client.spans.export_to_df(
|
|
143
|
+
space_id=SPACE_ID,
|
|
144
|
+
project_name=PROJECT_NAME,
|
|
145
|
+
start_time=start_time,
|
|
146
|
+
end_time=end_time,
|
|
147
|
+
)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Operations on ML Models
|
|
151
|
+
|
|
152
|
+
Use `arize.models` to interact with ML models: log ML data (traininv, validation, production) into Arize, either streaming or in batches.
|
|
153
|
+
|
|
154
|
+
> **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
|
|
155
|
+
|
|
156
|
+
#### Stream log ML Data for a Classification use-case
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from arize import ArizeClient
|
|
160
|
+
from arize.types import ModelTypes, Environments
|
|
161
|
+
|
|
162
|
+
client = ArizeClient(api_key=API_KEY)
|
|
163
|
+
SPACE_ID = "<your-space-id>"
|
|
164
|
+
MODEL_NAME = "<your-model-name>"
|
|
165
|
+
|
|
166
|
+
features=...
|
|
167
|
+
embedding_features=...
|
|
168
|
+
|
|
169
|
+
response = client.models.log_stream(
|
|
170
|
+
space_id=SPACE_ID,
|
|
171
|
+
model_name=MODEL_NAME,
|
|
172
|
+
model_type=ModelTypes.SCORE_CATEGORICAL,
|
|
173
|
+
environment=Environments.PRODUCTION,
|
|
174
|
+
prediction_label=("not fraud",0.3),
|
|
175
|
+
actual_label=("fraud",1.0),
|
|
176
|
+
features=features,
|
|
177
|
+
embedding_features=embedding_features,
|
|
178
|
+
)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
#### Log a batch of ML Data for a Classification use-case
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
from arize import ArizeClient
|
|
185
|
+
from arize.types import ModelTypes, Environments
|
|
186
|
+
|
|
187
|
+
client = ArizeClient(api_key=API_KEY)
|
|
188
|
+
SPACE_ID = "<your-space-id>"
|
|
189
|
+
MODEL_NAME = "<your-model-name>"
|
|
190
|
+
MODEL_VERSION = "1.0"
|
|
191
|
+
|
|
192
|
+
from arize.types import Schema, EmbeddingColumnNames, ObjectDetectionColumnNames, ModelTypes, Environments
|
|
193
|
+
|
|
194
|
+
tags = ["drift_type"]
|
|
195
|
+
embedding_feature_column_names = {
|
|
196
|
+
"image_embedding": EmbeddingColumnNames(
|
|
197
|
+
vector_column_name="image_vector", link_to_data_column_name="url"
|
|
198
|
+
)
|
|
199
|
+
}
|
|
200
|
+
object_detection_prediction_column_names = ObjectDetectionColumnNames(
|
|
201
|
+
bounding_boxes_coordinates_column_name="prediction_bboxes",
|
|
202
|
+
categories_column_name="prediction_categories",
|
|
203
|
+
scores_column_name="prediction_scores",
|
|
204
|
+
)
|
|
205
|
+
object_detection_actual_column_names = ObjectDetectionColumnNames(
|
|
206
|
+
bounding_boxes_coordinates_column_name="actual_bboxes",
|
|
207
|
+
categories_column_name="actual_categories",
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Define a Schema() object for Arize to pick up data from the correct columns for logging
|
|
211
|
+
schema = Schema(
|
|
212
|
+
prediction_id_column_name="prediction_id",
|
|
213
|
+
timestamp_column_name="prediction_ts",
|
|
214
|
+
tag_column_names=tags,
|
|
215
|
+
embedding_feature_column_names=embedding_feature_column_names,
|
|
216
|
+
object_detection_prediction_column_names=object_detection_prediction_column_names,
|
|
217
|
+
object_detection_actual_column_names=object_detection_actual_column_names,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Logging Production DataFrame
|
|
221
|
+
response = client.models.log_batch(
|
|
222
|
+
space_id=SPACE_ID,
|
|
223
|
+
model_name=MODEL_NAME,
|
|
224
|
+
model_type=ModelTypes.OBJECT_DETECTION,
|
|
225
|
+
dataframe=prod_df,
|
|
226
|
+
schema=schema,
|
|
227
|
+
environment=Environments.PRODUCTION,
|
|
228
|
+
model_version = MODEL_VERSION, # Optionally pass a model version
|
|
229
|
+
)
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
#### Exporting ML Data
|
|
233
|
+
|
|
234
|
+
Use the `export_to_df` or `export_to_parquet` to export large amounts of spans from Arize.
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
from arize import ArizeClient
|
|
238
|
+
from datetime import datetime
|
|
239
|
+
|
|
240
|
+
FMT = "%Y-%m-%d"
|
|
241
|
+
start_time = datetime.strptime("2024-01-01",FMT)
|
|
242
|
+
end_time = datetime.strptime("2026-01-01",FMT)
|
|
243
|
+
|
|
244
|
+
client = ArizeClient(api_key=API_KEY)
|
|
245
|
+
SPACE_ID = "<your-space-id>"
|
|
246
|
+
MODEL_NAME = "<your-model-name>"
|
|
247
|
+
MODEL_VERSION = "1.0"
|
|
248
|
+
|
|
249
|
+
df = client.models.export_to_df(
|
|
250
|
+
space_id=SPACE_ID,
|
|
251
|
+
model_name=MODEL_NAME,
|
|
252
|
+
environment=Environments.TRAINING,
|
|
253
|
+
model_version=MODEL_VERSION,
|
|
254
|
+
start_time=start_time,
|
|
255
|
+
end_time=end_time,
|
|
108
256
|
)
|
|
109
257
|
```
|
|
110
258
|
|
|
@@ -35,6 +35,7 @@ classifiers = [
|
|
|
35
35
|
]
|
|
36
36
|
dependencies = [
|
|
37
37
|
"lazy-imports",
|
|
38
|
+
"numpy>=2.0.0",
|
|
38
39
|
# "requests_futures==1.0.0",
|
|
39
40
|
# "googleapis_common_protos>=1.51.0,<2",
|
|
40
41
|
# "protobuf>=4.21.0,<6",
|
|
@@ -51,7 +52,6 @@ dev = [
|
|
|
51
52
|
"ruff==0.13.2",
|
|
52
53
|
]
|
|
53
54
|
spans = [
|
|
54
|
-
"numpy>=2.0.0",
|
|
55
55
|
"openinference-semantic-conventions>=0.1.21, <1",
|
|
56
56
|
"opentelemetry-semantic-conventions>=0.43b0, <1",
|
|
57
57
|
"pandas>=1.0.0,<3",
|
|
@@ -59,6 +59,16 @@ spans = [
|
|
|
59
59
|
"pyarrow>=0.15.0",
|
|
60
60
|
"tqdm",
|
|
61
61
|
]
|
|
62
|
+
ml-stream = [
|
|
63
|
+
"requests_futures>=1.0.0, <2",
|
|
64
|
+
"protobuf>=4.21.0,<6",
|
|
65
|
+
]
|
|
66
|
+
ml-batch = [
|
|
67
|
+
"pandas>=1.0.0,<3",
|
|
68
|
+
"pyarrow>=0.15.0",
|
|
69
|
+
"protobuf>=4.21.0,<6",
|
|
70
|
+
"tqdm",
|
|
71
|
+
]
|
|
62
72
|
# datasets-experiments = [
|
|
63
73
|
# "pydantic",
|
|
64
74
|
# ]
|
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
# type: ignore[pb2]
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
2
|
import logging
|
|
5
3
|
from dataclasses import dataclass
|
|
6
4
|
from datetime import datetime
|
|
@@ -14,17 +12,14 @@ from google.protobuf.wrappers_pb2 import Int64Value
|
|
|
14
12
|
from pyarrow import flight
|
|
15
13
|
from tqdm import tqdm
|
|
16
14
|
|
|
17
|
-
from arize._exporter.parsers.tracing_data_parser import (
|
|
18
|
-
OtelTracingDataTransformer,
|
|
19
|
-
)
|
|
20
15
|
from arize._exporter.validation import (
|
|
21
16
|
validate_input_type,
|
|
22
|
-
validate_input_value,
|
|
23
17
|
validate_start_end_time,
|
|
24
18
|
)
|
|
25
19
|
from arize._generated.protocol.flight import export_pb2
|
|
20
|
+
from arize.logging import CtxAdapter
|
|
26
21
|
from arize.types import Environments, SimilaritySearchParams
|
|
27
|
-
from arize.utils.
|
|
22
|
+
from arize.utils.dataframe import reset_dataframe_index
|
|
28
23
|
from arize.utils.proto import get_pb_flight_doput_request
|
|
29
24
|
|
|
30
25
|
logger = logging.getLogger(__name__)
|
|
@@ -124,6 +119,10 @@ class ArizeExportClient:
|
|
|
124
119
|
df.drop(null_columns, axis=1, inplace=True)
|
|
125
120
|
|
|
126
121
|
if environment == Environments.TRACING:
|
|
122
|
+
from arize._exporter.parsers.tracing_data_parser import (
|
|
123
|
+
OtelTracingDataTransformer,
|
|
124
|
+
)
|
|
125
|
+
|
|
127
126
|
# by default, transform the exported tracing data so that it's
|
|
128
127
|
# easier to work with in Python
|
|
129
128
|
df = OtelTracingDataTransformer().transform(df)
|
|
@@ -238,6 +237,27 @@ class ArizeExportClient:
|
|
|
238
237
|
columns: List | None = None,
|
|
239
238
|
stream_chunk_size: int | None = None,
|
|
240
239
|
) -> Tuple[flight.FlightStreamReader, int]:
|
|
240
|
+
# Bind common context for this operation
|
|
241
|
+
log = CtxAdapter(
|
|
242
|
+
logger,
|
|
243
|
+
{
|
|
244
|
+
"component": "exporter",
|
|
245
|
+
"operation": "export_to_df",
|
|
246
|
+
"space_id": space_id,
|
|
247
|
+
"model_id": model_id,
|
|
248
|
+
"environment": environment.name,
|
|
249
|
+
"model_version": model_version,
|
|
250
|
+
"batch_id": batch_id,
|
|
251
|
+
"include_actuals": include_actuals,
|
|
252
|
+
"where": where,
|
|
253
|
+
"columns": columns,
|
|
254
|
+
"similarity_search_params": similarity_search_params,
|
|
255
|
+
"stream_chunk_size": stream_chunk_size,
|
|
256
|
+
"start_time": start_time,
|
|
257
|
+
"end_time": end_time,
|
|
258
|
+
},
|
|
259
|
+
)
|
|
260
|
+
log.debug("Getting stream reader...")
|
|
241
261
|
validate_input_type(space_id, "space_id", str)
|
|
242
262
|
validate_input_type(model_id, "model_id", str)
|
|
243
263
|
validate_input_type(environment, "environment", Environments)
|
|
@@ -281,7 +301,7 @@ class ArizeExportClient:
|
|
|
281
301
|
json_format.MessageToJson(query_descriptor) # type: ignore
|
|
282
302
|
),
|
|
283
303
|
)
|
|
284
|
-
logger.
|
|
304
|
+
logger.info("Fetching data...")
|
|
285
305
|
|
|
286
306
|
if flight_info.total_records == 0:
|
|
287
307
|
logger.warning("Query returns no data")
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
|
-
from typing import List
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
5
|
+
from typing import TYPE_CHECKING, List
|
|
7
6
|
|
|
8
7
|
from arize.spans.columns import (
|
|
9
8
|
SPAN_ATTRIBUTES_EMBEDDING_EMBEDDINGS_COL,
|
|
@@ -19,6 +18,10 @@ from arize.spans.columns import (
|
|
|
19
18
|
SPAN_START_TIME_COL,
|
|
20
19
|
)
|
|
21
20
|
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
import numpy as np
|
|
23
|
+
import pandas as pd
|
|
24
|
+
|
|
22
25
|
logger = logging.getLogger(__name__)
|
|
23
26
|
|
|
24
27
|
|
|
@@ -1,5 +1,9 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from datetime import datetime
|
|
3
7
|
|
|
4
8
|
|
|
5
9
|
def validate_input_type(
|
|
@@ -35,6 +39,6 @@ def validate_input_value(
|
|
|
35
39
|
)
|
|
36
40
|
|
|
37
41
|
|
|
38
|
-
def validate_start_end_time(start_time, end_time: datetime) -> None:
|
|
42
|
+
def validate_start_end_time(start_time: datetime, end_time: datetime) -> None:
|
|
39
43
|
if start_time >= end_time:
|
|
40
44
|
raise ValueError("start_time must be before end_time")
|
|
@@ -2,29 +2,28 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import base64
|
|
5
|
+
import logging
|
|
5
6
|
from dataclasses import dataclass, field
|
|
6
|
-
from typing import Any, Dict, List, Tuple
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Tuple
|
|
7
8
|
|
|
8
|
-
import pyarrow as pa
|
|
9
9
|
from google.protobuf import json_format
|
|
10
10
|
from pyarrow import flight
|
|
11
11
|
|
|
12
|
-
from arize._flight.types import FlightRequestType
|
|
13
12
|
from arize._generated.protocol.flight.ingest_pb2 import (
|
|
14
13
|
WriteSpanAnnotationResponse,
|
|
15
14
|
WriteSpanAttributesMetadataResponse,
|
|
16
15
|
WriteSpanEvaluationResponse,
|
|
17
16
|
)
|
|
18
17
|
from arize.config import get_python_version
|
|
19
|
-
from arize.constants.config import (
|
|
20
|
-
DEFAULT_FLIGHT_HOST,
|
|
21
|
-
DEFAULT_FLIGHT_PORT,
|
|
22
|
-
DEFAULT_FLIGHT_TRANSPORT_SCHEME,
|
|
23
|
-
)
|
|
24
18
|
from arize.logging import log_a_list
|
|
25
19
|
from arize.utils.proto import get_pb_flight_doput_request, get_pb_schema_tracing
|
|
26
20
|
from arize.version import __version__
|
|
27
21
|
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
import pyarrow as pa
|
|
24
|
+
|
|
25
|
+
from arize._flight.client import FlightRequestType
|
|
26
|
+
|
|
28
27
|
BytesPair = Tuple[bytes, bytes]
|
|
29
28
|
Headers = List[BytesPair]
|
|
30
29
|
WriteSpanResponse = (
|
|
@@ -32,7 +31,6 @@ WriteSpanResponse = (
|
|
|
32
31
|
| WriteSpanAnnotationResponse
|
|
33
32
|
| WriteSpanAttributesMetadataResponse
|
|
34
33
|
)
|
|
35
|
-
import logging
|
|
36
34
|
|
|
37
35
|
logger = logging.getLogger(__name__)
|
|
38
36
|
|
|
@@ -40,10 +38,10 @@ logger = logging.getLogger(__name__)
|
|
|
40
38
|
@dataclass(frozen=True)
|
|
41
39
|
class ArizeFlightClient:
|
|
42
40
|
api_key: str = field(repr=False)
|
|
43
|
-
host: str
|
|
44
|
-
port: int
|
|
45
|
-
scheme: str
|
|
46
|
-
request_verify: bool
|
|
41
|
+
host: str
|
|
42
|
+
port: int
|
|
43
|
+
scheme: str
|
|
44
|
+
request_verify: bool
|
|
47
45
|
|
|
48
46
|
# internal cache for the underlying FlightClient
|
|
49
47
|
_client: flight.FlightClient | None = field(
|
|
@@ -54,7 +52,6 @@ class ArizeFlightClient:
|
|
|
54
52
|
|
|
55
53
|
@property
|
|
56
54
|
def headers(self) -> Headers:
|
|
57
|
-
# Keep the typing simple: (bytes, bytes)
|
|
58
55
|
return [
|
|
59
56
|
(b"origin", b"arize-logging-client"),
|
|
60
57
|
(b"auth-token-bin", str(self.api_key).encode("utf-8")),
|