lmnr 0.6.16__py3-none-any.whl → 0.7.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. lmnr/__init__.py +6 -15
  2. lmnr/cli/__init__.py +270 -0
  3. lmnr/cli/datasets.py +371 -0
  4. lmnr/{cli.py → cli/evals.py} +20 -102
  5. lmnr/cli/rules.py +42 -0
  6. lmnr/opentelemetry_lib/__init__.py +9 -2
  7. lmnr/opentelemetry_lib/decorators/__init__.py +274 -168
  8. lmnr/opentelemetry_lib/litellm/__init__.py +352 -38
  9. lmnr/opentelemetry_lib/litellm/utils.py +82 -0
  10. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +849 -0
  11. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/config.py +13 -0
  12. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_emitter.py +211 -0
  13. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_models.py +41 -0
  14. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/span_utils.py +401 -0
  15. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py +425 -0
  16. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py +332 -0
  17. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py +1 -0
  18. lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/__init__.py +451 -0
  19. lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/proxy.py +144 -0
  20. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_agent/__init__.py +100 -0
  21. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/__init__.py +476 -0
  22. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/utils.py +12 -0
  23. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +191 -129
  24. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +26 -0
  25. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/utils.py +126 -41
  26. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +488 -0
  27. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/config.py +8 -0
  28. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_emitter.py +143 -0
  29. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_models.py +41 -0
  30. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/span_utils.py +229 -0
  31. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/utils.py +92 -0
  32. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/version.py +1 -0
  33. lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/__init__.py +381 -0
  34. lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/utils.py +36 -0
  35. lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +16 -16
  36. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
  37. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +472 -0
  38. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1185 -0
  39. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +305 -0
  40. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
  41. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +312 -0
  42. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
  43. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
  44. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
  45. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +197 -0
  46. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
  47. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +368 -0
  48. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +325 -0
  49. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +135 -0
  50. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +786 -0
  51. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
  52. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openhands_ai/__init__.py +388 -0
  53. lmnr/opentelemetry_lib/opentelemetry/instrumentation/opentelemetry/__init__.py +69 -0
  54. lmnr/opentelemetry_lib/opentelemetry/instrumentation/skyvern/__init__.py +59 -61
  55. lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +197 -0
  56. lmnr/opentelemetry_lib/tracing/__init__.py +119 -18
  57. lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +124 -25
  58. lmnr/opentelemetry_lib/tracing/attributes.py +4 -0
  59. lmnr/opentelemetry_lib/tracing/context.py +200 -0
  60. lmnr/opentelemetry_lib/tracing/exporter.py +109 -15
  61. lmnr/opentelemetry_lib/tracing/instruments.py +22 -5
  62. lmnr/opentelemetry_lib/tracing/processor.py +128 -30
  63. lmnr/opentelemetry_lib/tracing/span.py +398 -0
  64. lmnr/opentelemetry_lib/tracing/tracer.py +40 -1
  65. lmnr/opentelemetry_lib/tracing/utils.py +62 -0
  66. lmnr/opentelemetry_lib/utils/package_check.py +9 -0
  67. lmnr/opentelemetry_lib/utils/wrappers.py +11 -0
  68. lmnr/sdk/browser/background_send_events.py +158 -0
  69. lmnr/sdk/browser/browser_use_cdp_otel.py +100 -0
  70. lmnr/sdk/browser/browser_use_otel.py +12 -12
  71. lmnr/sdk/browser/bubus_otel.py +71 -0
  72. lmnr/sdk/browser/cdp_utils.py +518 -0
  73. lmnr/sdk/browser/inject_script.js +514 -0
  74. lmnr/sdk/browser/patchright_otel.py +18 -44
  75. lmnr/sdk/browser/playwright_otel.py +104 -187
  76. lmnr/sdk/browser/pw_utils.py +249 -210
  77. lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
  78. lmnr/sdk/browser/utils.py +1 -1
  79. lmnr/sdk/client/asynchronous/async_client.py +47 -15
  80. lmnr/sdk/client/asynchronous/resources/__init__.py +2 -7
  81. lmnr/sdk/client/asynchronous/resources/browser_events.py +1 -0
  82. lmnr/sdk/client/asynchronous/resources/datasets.py +131 -0
  83. lmnr/sdk/client/asynchronous/resources/evals.py +122 -18
  84. lmnr/sdk/client/asynchronous/resources/evaluators.py +85 -0
  85. lmnr/sdk/client/asynchronous/resources/tags.py +4 -10
  86. lmnr/sdk/client/synchronous/resources/__init__.py +2 -2
  87. lmnr/sdk/client/synchronous/resources/datasets.py +131 -0
  88. lmnr/sdk/client/synchronous/resources/evals.py +83 -17
  89. lmnr/sdk/client/synchronous/resources/evaluators.py +85 -0
  90. lmnr/sdk/client/synchronous/resources/tags.py +4 -10
  91. lmnr/sdk/client/synchronous/sync_client.py +47 -15
  92. lmnr/sdk/datasets/__init__.py +94 -0
  93. lmnr/sdk/datasets/file_utils.py +91 -0
  94. lmnr/sdk/decorators.py +103 -23
  95. lmnr/sdk/evaluations.py +122 -33
  96. lmnr/sdk/laminar.py +816 -333
  97. lmnr/sdk/log.py +7 -2
  98. lmnr/sdk/types.py +124 -143
  99. lmnr/sdk/utils.py +115 -2
  100. lmnr/version.py +1 -1
  101. {lmnr-0.6.16.dist-info → lmnr-0.7.26.dist-info}/METADATA +71 -78
  102. lmnr-0.7.26.dist-info/RECORD +116 -0
  103. lmnr-0.7.26.dist-info/WHEEL +4 -0
  104. lmnr-0.7.26.dist-info/entry_points.txt +3 -0
  105. lmnr/opentelemetry_lib/tracing/context_properties.py +0 -65
  106. lmnr/sdk/browser/rrweb/rrweb.umd.min.cjs +0 -98
  107. lmnr/sdk/client/asynchronous/resources/agent.py +0 -329
  108. lmnr/sdk/client/synchronous/resources/agent.py +0 -323
  109. lmnr/sdk/datasets.py +0 -60
  110. lmnr-0.6.16.dist-info/LICENSE +0 -75
  111. lmnr-0.6.16.dist-info/RECORD +0 -61
  112. lmnr-0.6.16.dist-info/WHEEL +0 -4
  113. lmnr-0.6.16.dist-info/entry_points.txt +0 -3
lmnr/__init__.py CHANGED
@@ -3,29 +3,21 @@ from .sdk.client.asynchronous.async_client import AsyncLaminarClient
3
3
  from .sdk.datasets import EvaluationDataset, LaminarDataset
4
4
  from .sdk.evaluations import evaluate
5
5
  from .sdk.laminar import Laminar
6
- from .sdk.types import (
7
- AgentOutput,
8
- FinalOutputChunkContent,
9
- HumanEvaluator,
10
- RunAgentResponseChunk,
11
- StepChunkContent,
12
- TracingLevel,
13
- )
6
+ from .sdk.types import SessionRecordingOptions, MaskInputOptions
7
+ from .sdk.types import HumanEvaluator
14
8
  from .sdk.decorators import observe
15
9
  from .sdk.types import LaminarSpanContext
16
10
  from .opentelemetry_lib.litellm import LaminarLiteLLMCallback
17
11
  from .opentelemetry_lib.tracing.attributes import Attributes
18
12
  from .opentelemetry_lib.tracing.instruments import Instruments
19
13
  from .opentelemetry_lib.tracing.processor import LaminarSpanProcessor
14
+ from .opentelemetry_lib.tracing.span import LaminarSpan
20
15
  from .opentelemetry_lib.tracing.tracer import get_laminar_tracer_provider, get_tracer
21
- from opentelemetry.trace import use_span
22
16
 
23
17
  __all__ = [
24
- "AgentOutput",
25
18
  "AsyncLaminarClient",
26
19
  "Attributes",
27
20
  "EvaluationDataset",
28
- "FinalOutputChunkContent",
29
21
  "HumanEvaluator",
30
22
  "Instruments",
31
23
  "Laminar",
@@ -34,12 +26,11 @@ __all__ = [
34
26
  "LaminarLiteLLMCallback",
35
27
  "LaminarSpanContext",
36
28
  "LaminarSpanProcessor",
37
- "RunAgentResponseChunk",
38
- "StepChunkContent",
39
- "TracingLevel",
29
+ "LaminarSpan",
40
30
  "get_laminar_tracer_provider",
41
31
  "get_tracer",
42
32
  "evaluate",
43
33
  "observe",
44
- "use_span",
34
+ "SessionRecordingOptions",
35
+ "MaskInputOptions",
45
36
  ]
lmnr/cli/__init__.py ADDED
@@ -0,0 +1,270 @@
1
+ from argparse import ArgumentParser, _SubParsersAction
2
+ import asyncio
3
+
4
+ from lmnr.cli.datasets import handle_datasets_command
5
+ from lmnr.cli.evals import run_evaluation
6
+ from lmnr.cli.rules import add_cursor_rules
7
+ from lmnr.sdk.log import get_default_logger
8
+ from lmnr.sdk.utils import from_env
9
+
10
+ LOG = get_default_logger(__name__)
11
+ EVAL_DIR = "evals"
12
+ DEFAULT_DATASET_PULL_BATCH_SIZE = 100
13
+ DEFAULT_DATASET_PUSH_BATCH_SIZE = 100
14
+
15
+
16
+ def setup_eval_parser(subparsers: _SubParsersAction) -> None:
17
+ """Setup the eval subcommand parser."""
18
+ parser_eval = subparsers.add_parser(
19
+ "eval",
20
+ description="Run an evaluation",
21
+ help="Run an evaluation",
22
+ )
23
+ parser_eval.add_argument(
24
+ "file",
25
+ nargs="*",
26
+ help="Files or a file containing the evaluation to run. "
27
+ + "If no file name is provided, all evaluation files in the `evals` directory are run as long "
28
+ + "as they match *_eval.py or eval_*.py",
29
+ default=[],
30
+ )
31
+ parser_eval.add_argument(
32
+ "--continue-on-error",
33
+ action="store_true",
34
+ default=False,
35
+ help="Continue execution upon errors",
36
+ )
37
+ parser_eval.add_argument(
38
+ "--output-file",
39
+ help="Output file to write the results to. Outputs are written in JSON format.",
40
+ nargs="?",
41
+ )
42
+
43
+
44
+ def setup_add_cursor_rules_parser(subparsers: _SubParsersAction) -> None:
45
+ """Setup the add-cursor-rules subcommand parser."""
46
+ subparsers.add_parser(
47
+ "add-cursor-rules",
48
+ description="Download laminar.mdc file and add it to .cursor/rules",
49
+ help="Download laminar.mdc file and add it to .cursor/rules",
50
+ )
51
+
52
+
53
+ def setup_laminar_args(parser: ArgumentParser) -> None:
54
+ """Setup the laminar arguments parser."""
55
+ parser.add_argument(
56
+ "--project-api-key",
57
+ help="[Optional] Project API key to use for the command. "
58
+ + "If no project API key is provided, the project API key will be read "
59
+ + "from the environment variable LMNR_PROJECT_API_KEY.",
60
+ default=from_env("LMNR_PROJECT_API_KEY"),
61
+ )
62
+ parser.add_argument(
63
+ "--base-url",
64
+ help="[Optional] Base URL to use for the command. "
65
+ + "If no base URL is provided, the base URL will be read from the "
66
+ + "'LMNR_BASE_URL' environment variable or we default to 'https://api.lmnr.ai'.",
67
+ default=from_env("LMNR_BASE_URL") or "https://api.lmnr.ai",
68
+ )
69
+ parser.add_argument(
70
+ "--port",
71
+ help="[Optional] Port to use for the command. "
72
+ + "If no port is provided, the port defaults to '443'.",
73
+ type=int,
74
+ )
75
+
76
+
77
+ def setup_datasets_list_parser(subparsers: _SubParsersAction) -> None:
78
+ """Setup the datasets list subcommand parser."""
79
+ subparsers.add_parser(
80
+ "list",
81
+ description="List datasets",
82
+ help="List datasets",
83
+ )
84
+
85
+
86
+ def setup_datasets_push_parser(subparsers: _SubParsersAction) -> None:
87
+ """Setup the datasets push subcommand parser."""
88
+ parser_datasets_push: ArgumentParser = subparsers.add_parser(
89
+ "push",
90
+ description="Push datapoints to an existing dataset",
91
+ help="Push datapoints to an existing dataset",
92
+ )
93
+ parser_datasets_push.add_argument(
94
+ "--name",
95
+ "-n",
96
+ help="Name of the dataset to push data to. Exactly one of name or id must be provided.",
97
+ default=None,
98
+ )
99
+ parser_datasets_push.add_argument(
100
+ "--id",
101
+ help="ID of the dataset to push data to. Exactly one of name or id must be provided.",
102
+ default=None,
103
+ )
104
+ parser_datasets_push.add_argument(
105
+ "paths",
106
+ nargs="*",
107
+ help="Paths to the files or directories containing the data to push to the dataset. "
108
+ + "Supported formats: JSON, CSV, JSONL",
109
+ )
110
+ parser_datasets_push.add_argument(
111
+ "-r",
112
+ "--recursive",
113
+ action="store_true",
114
+ default=False,
115
+ help="Recursively read all files in the directories and their subdirectories.",
116
+ )
117
+ parser_datasets_push.add_argument(
118
+ "--batch-size",
119
+ type=int,
120
+ help="Batch size to push data in. If no batch size is provided, "
121
+ + f"data is pushed in batches of '{DEFAULT_DATASET_PUSH_BATCH_SIZE}'.",
122
+ default=DEFAULT_DATASET_PUSH_BATCH_SIZE,
123
+ )
124
+
125
+
126
+ def setup_datasets_pull_parser(subparsers: _SubParsersAction) -> None:
127
+ """Setup the datasets pull subcommand parser."""
128
+ parser_datasets_pull: ArgumentParser = subparsers.add_parser(
129
+ "pull",
130
+ description="Pull data from a dataset",
131
+ help="Pull data from a dataset",
132
+ )
133
+ parser_datasets_pull.add_argument(
134
+ "--name",
135
+ "-n",
136
+ help="Name of the dataset to pull data from",
137
+ default=None,
138
+ )
139
+ parser_datasets_pull.add_argument(
140
+ "--id",
141
+ help="ID of the dataset to pull data from",
142
+ default=None,
143
+ )
144
+ parser_datasets_pull.add_argument(
145
+ "output_path",
146
+ help="Path to the file to save the data to. "
147
+ + "If no path is provided, data is printed to the console in the format "
148
+ + "specified by '--output-format'.",
149
+ nargs="?",
150
+ )
151
+ parser_datasets_pull.add_argument(
152
+ "--output-format",
153
+ choices=["json", "csv", "jsonl"],
154
+ help="Output format to save the data to. "
155
+ + "If no format is provided, it is inferred from the file extension.",
156
+ )
157
+ parser_datasets_pull.add_argument(
158
+ "--batch-size",
159
+ type=int,
160
+ help="Batch size to pull data in. If no batch size is provided, "
161
+ + f"data is pulled in batches of '{DEFAULT_DATASET_PULL_BATCH_SIZE}'.",
162
+ default=DEFAULT_DATASET_PULL_BATCH_SIZE,
163
+ )
164
+ parser_datasets_pull.add_argument(
165
+ "--limit",
166
+ type=int,
167
+ help="Limit the number of data points to pull. "
168
+ + "If no limit is provided, all data points are pulled.",
169
+ )
170
+ parser_datasets_pull.add_argument(
171
+ "--offset",
172
+ type=int,
173
+ help="Offset the number of data points to pull. "
174
+ + "If no offset is provided, data is pulled from the beginning.",
175
+ )
176
+
177
+
178
+ def setup_datasets_create_parser(subparsers: _SubParsersAction) -> None:
179
+ """Setup the datasets create subcommand parser."""
180
+ parser_datasets_create: ArgumentParser = subparsers.add_parser(
181
+ "create",
182
+ description="Create a dataset from input files and download it in Laminar format",
183
+ help="Create a dataset from input files and download it in Laminar format",
184
+ )
185
+ parser_datasets_create.add_argument(
186
+ "name",
187
+ help="Name of the dataset to create",
188
+ )
189
+ parser_datasets_create.add_argument(
190
+ "paths",
191
+ nargs="+",
192
+ help="Paths to the files or directories containing the data to push to the dataset. "
193
+ + "Supported formats: JSON, CSV, JSONL",
194
+ )
195
+ parser_datasets_create.add_argument(
196
+ "-o",
197
+ "--output-file",
198
+ required=True,
199
+ help="Path to the file to save the pulled data to",
200
+ )
201
+ parser_datasets_create.add_argument(
202
+ "--output-format",
203
+ choices=["json", "csv", "jsonl"],
204
+ help="Output format to save the data to. "
205
+ + "If no format is provided, it is inferred from the output file extension.",
206
+ )
207
+ parser_datasets_create.add_argument(
208
+ "-r",
209
+ "--recursive",
210
+ action="store_true",
211
+ default=False,
212
+ help="Recursively read all files in the directories and their subdirectories.",
213
+ )
214
+ parser_datasets_create.add_argument(
215
+ "--batch-size",
216
+ type=int,
217
+ help="Batch size to push/pull data in. If no batch size is provided, "
218
+ + f"data is processed in batches of '{DEFAULT_DATASET_PUSH_BATCH_SIZE}'.",
219
+ default=DEFAULT_DATASET_PUSH_BATCH_SIZE,
220
+ )
221
+
222
+
223
+ def setup_datasets_parser(subparsers: _SubParsersAction) -> None:
224
+ """Setup the datasets subcommand parser and its subcommands."""
225
+ parser_datasets: ArgumentParser = subparsers.add_parser(
226
+ "datasets",
227
+ description="Manage datasets",
228
+ help="Manage datasets",
229
+ )
230
+
231
+ setup_laminar_args(parser_datasets)
232
+
233
+ parser_datasets_subparsers = parser_datasets.add_subparsers(
234
+ title="command",
235
+ dest="command",
236
+ )
237
+
238
+ # Setup all dataset subcommands
239
+ setup_datasets_list_parser(parser_datasets_subparsers)
240
+ setup_datasets_push_parser(parser_datasets_subparsers)
241
+ setup_datasets_pull_parser(parser_datasets_subparsers)
242
+ setup_datasets_create_parser(parser_datasets_subparsers)
243
+
244
+
245
+ def cli() -> None:
246
+ """Main CLI entry point."""
247
+ parser = ArgumentParser(
248
+ prog="lmnr",
249
+ description="CLI for Laminar. "
250
+ + "Call `lmnr [subcommand] --help` for more information on each subcommand.",
251
+ )
252
+
253
+ subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")
254
+
255
+ # Setup all subcommand parsers
256
+ setup_eval_parser(subparsers)
257
+ setup_add_cursor_rules_parser(subparsers)
258
+ setup_datasets_parser(subparsers)
259
+
260
+ # Parse arguments and dispatch to appropriate handler
261
+ parsed = parser.parse_args()
262
+
263
+ if parsed.subcommand == "eval":
264
+ asyncio.run(run_evaluation(parsed))
265
+ elif parsed.subcommand == "add-cursor-rules":
266
+ add_cursor_rules()
267
+ elif parsed.subcommand == "datasets":
268
+ asyncio.run(handle_datasets_command(parsed))
269
+ else:
270
+ parser.print_help()
lmnr/cli/datasets.py ADDED
@@ -0,0 +1,371 @@
1
+ from argparse import Namespace
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ import csv
6
+ import sys
7
+
8
+ import orjson
9
+
10
+ from lmnr.sdk.client.asynchronous.async_client import AsyncLaminarClient
11
+ from lmnr.sdk.datasets.file_utils import load_from_paths, parse_paths
12
+ from lmnr.sdk.log import get_default_logger
13
+ from lmnr.sdk.types import Datapoint
14
+
15
+ LOG = get_default_logger(__name__, verbose=False)
16
+ DEFAULT_DATASET_PULL_BATCH_SIZE = 100
17
+ DEFAULT_DATASET_PUSH_BATCH_SIZE = 100
18
+
19
+
20
+ def _dump_json(data: Any, do_indent: bool = True) -> str:
21
+ return orjson.dumps(
22
+ data,
23
+ option=(orjson.OPT_INDENT_2 if do_indent else 0)
24
+ | orjson.OPT_SERIALIZE_DATACLASS
25
+ | orjson.OPT_SERIALIZE_UUID
26
+ | orjson.OPT_UTC_Z
27
+ | orjson.OPT_NON_STR_KEYS,
28
+ ).decode()
29
+
30
+
31
+ async def _pull_all_data(
32
+ client: AsyncLaminarClient,
33
+ identifier: dict,
34
+ batch_size: int,
35
+ offset: int = 0,
36
+ limit: int | None = None,
37
+ ) -> list[Datapoint]:
38
+ """
39
+ Pull all data from a dataset. This function does not close the client.
40
+
41
+ Args:
42
+ client: The AsyncLaminarClient instance
43
+ identifier: Dict with either 'name' or 'id' key
44
+ batch_size: Size of batches to pull
45
+ offset: Starting offset for pulling data
46
+ limit: Maximum number of items to pull (None for all)
47
+
48
+ Returns:
49
+ List of all pulled datapoints
50
+ """
51
+ has_more = True
52
+ current_offset = offset
53
+ stop_at = offset + limit if limit else None
54
+
55
+ result: list[Datapoint] = []
56
+ while has_more and (stop_at is None or current_offset < stop_at):
57
+ data = await client.datasets.pull(
58
+ **identifier,
59
+ offset=current_offset,
60
+ limit=batch_size,
61
+ )
62
+ result.extend(data.items)
63
+ if stop_at is not None and current_offset + batch_size >= stop_at:
64
+ has_more = False
65
+ elif (
66
+ data.total_count is not None
67
+ and current_offset + batch_size >= data.total_count
68
+ ):
69
+ has_more = False
70
+ current_offset += batch_size
71
+
72
+ if limit is not None:
73
+ return result[:limit]
74
+ return result
75
+
76
+
77
+ def _write_data_to_file(
78
+ data: list[Datapoint],
79
+ output_path: Path,
80
+ output_format: str | None = None,
81
+ ) -> bool:
82
+ """
83
+ Write datapoints to a file.
84
+
85
+ Args:
86
+ data: List of datapoints to write
87
+ output_path: Path to write the file to
88
+ output_format: Optional format override (json, csv, jsonl)
89
+
90
+ Returns:
91
+ True if successful, False otherwise
92
+ """
93
+ if output_path.is_dir():
94
+ LOG.error(f"Output path is a directory: {output_path}")
95
+ return False
96
+
97
+ # Create parent directories if they don't exist
98
+ output_path.parent.mkdir(parents=True, exist_ok=True)
99
+
100
+ # Determine output format
101
+ format = output_format or output_path.suffix[1:]
102
+ if output_format and output_format != output_path.suffix[1:]:
103
+ LOG.warning(
104
+ f"Output format {output_format} does not match file extension {output_path.suffix[1:]}"
105
+ )
106
+
107
+ if format not in ["json", "csv", "jsonl"]:
108
+ LOG.error(f"Unsupported output format: {format}")
109
+ return False
110
+
111
+ # Write output file
112
+ if format == "json":
113
+ output_path.write_text(_dump_json([item.model_dump() for item in data]))
114
+ elif format == "csv":
115
+ if not data:
116
+ LOG.error("No data to write to CSV")
117
+ return False
118
+ with output_path.open("w") as f:
119
+ writer = csv.writer(f)
120
+ keys = list(data[0].model_dump().keys())
121
+ writer.writerow(keys)
122
+ for item in data:
123
+ writer.writerow([item.model_dump()[key] for key in keys])
124
+ elif format == "jsonl":
125
+ with output_path.open("w") as f:
126
+ for item in data:
127
+ f.write(_dump_json(item.model_dump(), do_indent=False) + "\n")
128
+
129
+ return True
130
+
131
+
132
+ def _print_data_to_console(data: list[Datapoint], output_format: str = "json") -> bool:
133
+ """
134
+ Print datapoints to console.
135
+
136
+ Args:
137
+ data: List of datapoints to print
138
+ output_format: Format to use (json, csv, jsonl)
139
+
140
+ Returns:
141
+ True if successful, False otherwise
142
+ """
143
+ if output_format not in ["json", "csv", "jsonl"]:
144
+ LOG.error(f"Unsupported output format: {output_format}")
145
+ return False
146
+
147
+ if output_format == "json":
148
+ print(_dump_json([item.model_dump() for item in data]))
149
+ elif output_format == "csv":
150
+ if not data:
151
+ LOG.error("No data to print")
152
+ return False
153
+ writer = csv.writer(sys.stdout)
154
+ keys = list(data[0].model_dump().keys())
155
+ writer.writerow(keys)
156
+ for item in data:
157
+ writer.writerow([item.model_dump()[key] for key in keys])
158
+ elif output_format == "jsonl":
159
+ for item in data:
160
+ print(_dump_json(item.model_dump(), do_indent=False))
161
+ print()
162
+
163
+ return True
164
+
165
+
166
+ async def handle_datasets_list(args: Namespace) -> None:
167
+ """
168
+ Handle datasets list command.
169
+
170
+ Lists all datasets in a formatted table.
171
+ """
172
+ client = AsyncLaminarClient(
173
+ project_api_key=args.project_api_key,
174
+ base_url=args.base_url,
175
+ port=args.port,
176
+ )
177
+
178
+ try:
179
+ datasets = await client.datasets.list_datasets()
180
+ except Exception as e:
181
+ LOG.error(f"Failed to list datasets: {e}")
182
+ return
183
+ finally:
184
+ await client.close()
185
+
186
+ if not datasets:
187
+ print("No datasets found.")
188
+ return
189
+
190
+ # Print table header
191
+ id_width = 36 # UUID length
192
+ created_at_width = 19 # YYYY-MM-DD HH:MM:SS format
193
+
194
+ print(f"\n{'ID':<{id_width}} {'Created At':<{created_at_width}} Name")
195
+ print(f"{'-' * id_width} {'-' * created_at_width} {'-' * 20}")
196
+
197
+ # Print each dataset row
198
+ for dataset in datasets:
199
+ created_at_str = dataset.created_at.strftime("%Y-%m-%d %H:%M:%S")
200
+ print(
201
+ f"{str(dataset.id):<{id_width}} {created_at_str:<{created_at_width}} {dataset.name}"
202
+ )
203
+
204
+ print(f"\nTotal: {len(datasets)} dataset(s)\n")
205
+
206
+
207
+ async def handle_datasets_push(args: Namespace) -> None:
208
+ """
209
+ Handle datasets push command.
210
+
211
+ Pushes data from file(s) to an existing dataset.
212
+ """
213
+ if not args.name and not args.id:
214
+ LOG.error("Either name or id must be provided")
215
+ return
216
+ if args.name and args.id:
217
+ LOG.error("Only one of name or id must be provided")
218
+ return
219
+ identifier = {"name": args.name} if args.name else {"id": args.id}
220
+ client = AsyncLaminarClient(
221
+ project_api_key=args.project_api_key,
222
+ base_url=args.base_url,
223
+ port=args.port,
224
+ )
225
+ data = load_from_paths(parse_paths(args.paths), recursive=args.recursive)
226
+ if len(data) == 0:
227
+ LOG.warning("No data to push. Skipping")
228
+ return
229
+ try:
230
+ await client.datasets.push(
231
+ data,
232
+ **identifier,
233
+ batch_size=args.batch_size or DEFAULT_DATASET_PUSH_BATCH_SIZE,
234
+ )
235
+ LOG.info(f"Pushed {len(data)} data points to dataset {args.name or args.id}")
236
+ except Exception as e:
237
+ LOG.error(f"Failed to push dataset: {e}")
238
+ finally:
239
+ await client.close()
240
+
241
+
242
+ async def handle_datasets_pull(args: Namespace) -> None:
243
+ """
244
+ Handle datasets pull command.
245
+
246
+ Pulls data from a dataset and saves it to a file.
247
+ """
248
+ if not args.name and not args.id:
249
+ LOG.error("Either name or id must be provided")
250
+ return
251
+ if args.name and args.id:
252
+ LOG.error("Only one of name or id must be provided")
253
+ return
254
+ identifier = {"name": args.name} if args.name else {"id": args.id}
255
+ client = AsyncLaminarClient(
256
+ project_api_key=args.project_api_key,
257
+ base_url=args.base_url,
258
+ port=args.port,
259
+ )
260
+
261
+ # Pull data from dataset
262
+ try:
263
+ result = await _pull_all_data(
264
+ client=client,
265
+ identifier=identifier,
266
+ batch_size=args.batch_size or DEFAULT_DATASET_PULL_BATCH_SIZE,
267
+ offset=args.offset or 0,
268
+ limit=args.limit,
269
+ )
270
+ except Exception as e:
271
+ LOG.error(f"Failed to pull dataset: {e}")
272
+ return
273
+ finally:
274
+ await client.close()
275
+
276
+ # Write to file or print to console
277
+ if args.output_path:
278
+ if not _write_data_to_file(
279
+ data=result,
280
+ output_path=Path(args.output_path),
281
+ output_format=args.output_format,
282
+ ):
283
+ return
284
+ else:
285
+ if not _print_data_to_console(
286
+ data=result,
287
+ output_format=args.output_format or "json",
288
+ ):
289
+ return
290
+
291
+
292
+ async def handle_datasets_create(args: Namespace) -> None:
293
+ """
294
+ Handle datasets create command.
295
+
296
+ Creates a dataset from input files, pushes the data to it, and then pulls it back
297
+ in Laminar format to save to the output file.
298
+ """
299
+ client = AsyncLaminarClient(
300
+ project_api_key=args.project_api_key,
301
+ base_url=args.base_url,
302
+ port=args.port,
303
+ )
304
+
305
+ # Load data from input files
306
+ data = load_from_paths(parse_paths(args.paths), recursive=args.recursive)
307
+ if len(data) == 0:
308
+ LOG.warning("No data to push. Skipping")
309
+ return
310
+
311
+ # Push data to create/populate the dataset
312
+ LOG.info(f"Pushing {len(data)} data points to dataset '{args.name}'...")
313
+ try:
314
+ await client.datasets.push(
315
+ data,
316
+ name=args.name,
317
+ batch_size=args.batch_size or DEFAULT_DATASET_PUSH_BATCH_SIZE,
318
+ create_dataset=True,
319
+ )
320
+ LOG.info(
321
+ f"Successfully pushed {len(data)} data points to dataset '{args.name}'"
322
+ )
323
+ except Exception as e:
324
+ LOG.error(f"Failed to create dataset: {e}")
325
+ return
326
+
327
+ # Pull data back from the dataset
328
+ LOG.info(f"Pulling data from dataset '{args.name}'...")
329
+ try:
330
+ result = await _pull_all_data(
331
+ client=client,
332
+ identifier={"name": args.name},
333
+ batch_size=args.batch_size or DEFAULT_DATASET_PULL_BATCH_SIZE,
334
+ offset=0,
335
+ limit=None,
336
+ )
337
+ except Exception as e:
338
+ LOG.error(f"Failed to pull dataset after creation: {e}")
339
+ return
340
+ finally:
341
+ await client.close()
342
+
343
+ # Save to output file
344
+ if not _write_data_to_file(
345
+ data=result,
346
+ output_path=Path(args.output_file),
347
+ output_format=args.output_format,
348
+ ):
349
+ return
350
+
351
+ LOG.info(
352
+ f"Successfully created dataset '{args.name}' and saved {len(result)} data points to {args.output_file}"
353
+ )
354
+
355
+
356
+ async def handle_datasets_command(args: Namespace) -> None:
357
+ """
358
+ Handle datasets subcommand dispatching.
359
+
360
+ Dispatches to the appropriate handler based on the command.
361
+ """
362
+ if args.command == "list":
363
+ await handle_datasets_list(args)
364
+ elif args.command == "push":
365
+ await handle_datasets_push(args)
366
+ elif args.command == "pull":
367
+ await handle_datasets_pull(args)
368
+ elif args.command == "create":
369
+ await handle_datasets_create(args)
370
+ else:
371
+ LOG.error(f"Unknown datasets command: {args.command}")