lmnr 0.6.16__py3-none-any.whl → 0.7.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/__init__.py +6 -15
- lmnr/cli/__init__.py +270 -0
- lmnr/cli/datasets.py +371 -0
- lmnr/{cli.py → cli/evals.py} +20 -102
- lmnr/cli/rules.py +42 -0
- lmnr/opentelemetry_lib/__init__.py +9 -2
- lmnr/opentelemetry_lib/decorators/__init__.py +274 -168
- lmnr/opentelemetry_lib/litellm/__init__.py +352 -38
- lmnr/opentelemetry_lib/litellm/utils.py +82 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +849 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/config.py +13 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_emitter.py +211 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/span_utils.py +401 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py +425 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py +332 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/__init__.py +451 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/proxy.py +144 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_agent/__init__.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/__init__.py +476 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/utils.py +12 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +191 -129
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +26 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/utils.py +126 -41
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +488 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/config.py +8 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_emitter.py +143 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/span_utils.py +229 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/utils.py +92 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/__init__.py +381 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/utils.py +36 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +16 -16
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +472 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1185 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +305 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +312 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +197 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +368 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +325 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +135 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +786 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openhands_ai/__init__.py +388 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/opentelemetry/__init__.py +69 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/skyvern/__init__.py +59 -61
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +197 -0
- lmnr/opentelemetry_lib/tracing/__init__.py +119 -18
- lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +124 -25
- lmnr/opentelemetry_lib/tracing/attributes.py +4 -0
- lmnr/opentelemetry_lib/tracing/context.py +200 -0
- lmnr/opentelemetry_lib/tracing/exporter.py +109 -15
- lmnr/opentelemetry_lib/tracing/instruments.py +22 -5
- lmnr/opentelemetry_lib/tracing/processor.py +128 -30
- lmnr/opentelemetry_lib/tracing/span.py +398 -0
- lmnr/opentelemetry_lib/tracing/tracer.py +40 -1
- lmnr/opentelemetry_lib/tracing/utils.py +62 -0
- lmnr/opentelemetry_lib/utils/package_check.py +9 -0
- lmnr/opentelemetry_lib/utils/wrappers.py +11 -0
- lmnr/sdk/browser/background_send_events.py +158 -0
- lmnr/sdk/browser/browser_use_cdp_otel.py +100 -0
- lmnr/sdk/browser/browser_use_otel.py +12 -12
- lmnr/sdk/browser/bubus_otel.py +71 -0
- lmnr/sdk/browser/cdp_utils.py +518 -0
- lmnr/sdk/browser/inject_script.js +514 -0
- lmnr/sdk/browser/patchright_otel.py +18 -44
- lmnr/sdk/browser/playwright_otel.py +104 -187
- lmnr/sdk/browser/pw_utils.py +249 -210
- lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
- lmnr/sdk/browser/utils.py +1 -1
- lmnr/sdk/client/asynchronous/async_client.py +47 -15
- lmnr/sdk/client/asynchronous/resources/__init__.py +2 -7
- lmnr/sdk/client/asynchronous/resources/browser_events.py +1 -0
- lmnr/sdk/client/asynchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/asynchronous/resources/evals.py +122 -18
- lmnr/sdk/client/asynchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/asynchronous/resources/tags.py +4 -10
- lmnr/sdk/client/synchronous/resources/__init__.py +2 -2
- lmnr/sdk/client/synchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/synchronous/resources/evals.py +83 -17
- lmnr/sdk/client/synchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/synchronous/resources/tags.py +4 -10
- lmnr/sdk/client/synchronous/sync_client.py +47 -15
- lmnr/sdk/datasets/__init__.py +94 -0
- lmnr/sdk/datasets/file_utils.py +91 -0
- lmnr/sdk/decorators.py +103 -23
- lmnr/sdk/evaluations.py +122 -33
- lmnr/sdk/laminar.py +816 -333
- lmnr/sdk/log.py +7 -2
- lmnr/sdk/types.py +124 -143
- lmnr/sdk/utils.py +115 -2
- lmnr/version.py +1 -1
- {lmnr-0.6.16.dist-info → lmnr-0.7.26.dist-info}/METADATA +71 -78
- lmnr-0.7.26.dist-info/RECORD +116 -0
- lmnr-0.7.26.dist-info/WHEEL +4 -0
- lmnr-0.7.26.dist-info/entry_points.txt +3 -0
- lmnr/opentelemetry_lib/tracing/context_properties.py +0 -65
- lmnr/sdk/browser/rrweb/rrweb.umd.min.cjs +0 -98
- lmnr/sdk/client/asynchronous/resources/agent.py +0 -329
- lmnr/sdk/client/synchronous/resources/agent.py +0 -323
- lmnr/sdk/datasets.py +0 -60
- lmnr-0.6.16.dist-info/LICENSE +0 -75
- lmnr-0.6.16.dist-info/RECORD +0 -61
- lmnr-0.6.16.dist-info/WHEEL +0 -4
- lmnr-0.6.16.dist-info/entry_points.txt +0 -3
lmnr/__init__.py
CHANGED
|
@@ -3,29 +3,21 @@ from .sdk.client.asynchronous.async_client import AsyncLaminarClient
|
|
|
3
3
|
from .sdk.datasets import EvaluationDataset, LaminarDataset
|
|
4
4
|
from .sdk.evaluations import evaluate
|
|
5
5
|
from .sdk.laminar import Laminar
|
|
6
|
-
from .sdk.types import
|
|
7
|
-
|
|
8
|
-
FinalOutputChunkContent,
|
|
9
|
-
HumanEvaluator,
|
|
10
|
-
RunAgentResponseChunk,
|
|
11
|
-
StepChunkContent,
|
|
12
|
-
TracingLevel,
|
|
13
|
-
)
|
|
6
|
+
from .sdk.types import SessionRecordingOptions, MaskInputOptions
|
|
7
|
+
from .sdk.types import HumanEvaluator
|
|
14
8
|
from .sdk.decorators import observe
|
|
15
9
|
from .sdk.types import LaminarSpanContext
|
|
16
10
|
from .opentelemetry_lib.litellm import LaminarLiteLLMCallback
|
|
17
11
|
from .opentelemetry_lib.tracing.attributes import Attributes
|
|
18
12
|
from .opentelemetry_lib.tracing.instruments import Instruments
|
|
19
13
|
from .opentelemetry_lib.tracing.processor import LaminarSpanProcessor
|
|
14
|
+
from .opentelemetry_lib.tracing.span import LaminarSpan
|
|
20
15
|
from .opentelemetry_lib.tracing.tracer import get_laminar_tracer_provider, get_tracer
|
|
21
|
-
from opentelemetry.trace import use_span
|
|
22
16
|
|
|
23
17
|
__all__ = [
|
|
24
|
-
"AgentOutput",
|
|
25
18
|
"AsyncLaminarClient",
|
|
26
19
|
"Attributes",
|
|
27
20
|
"EvaluationDataset",
|
|
28
|
-
"FinalOutputChunkContent",
|
|
29
21
|
"HumanEvaluator",
|
|
30
22
|
"Instruments",
|
|
31
23
|
"Laminar",
|
|
@@ -34,12 +26,11 @@ __all__ = [
|
|
|
34
26
|
"LaminarLiteLLMCallback",
|
|
35
27
|
"LaminarSpanContext",
|
|
36
28
|
"LaminarSpanProcessor",
|
|
37
|
-
"
|
|
38
|
-
"StepChunkContent",
|
|
39
|
-
"TracingLevel",
|
|
29
|
+
"LaminarSpan",
|
|
40
30
|
"get_laminar_tracer_provider",
|
|
41
31
|
"get_tracer",
|
|
42
32
|
"evaluate",
|
|
43
33
|
"observe",
|
|
44
|
-
"
|
|
34
|
+
"SessionRecordingOptions",
|
|
35
|
+
"MaskInputOptions",
|
|
45
36
|
]
|
lmnr/cli/__init__.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
from argparse import ArgumentParser, _SubParsersAction
|
|
2
|
+
import asyncio
|
|
3
|
+
|
|
4
|
+
from lmnr.cli.datasets import handle_datasets_command
|
|
5
|
+
from lmnr.cli.evals import run_evaluation
|
|
6
|
+
from lmnr.cli.rules import add_cursor_rules
|
|
7
|
+
from lmnr.sdk.log import get_default_logger
|
|
8
|
+
from lmnr.sdk.utils import from_env
|
|
9
|
+
|
|
10
|
+
LOG = get_default_logger(__name__)
|
|
11
|
+
EVAL_DIR = "evals"
|
|
12
|
+
DEFAULT_DATASET_PULL_BATCH_SIZE = 100
|
|
13
|
+
DEFAULT_DATASET_PUSH_BATCH_SIZE = 100
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def setup_eval_parser(subparsers: _SubParsersAction) -> None:
|
|
17
|
+
"""Setup the eval subcommand parser."""
|
|
18
|
+
parser_eval = subparsers.add_parser(
|
|
19
|
+
"eval",
|
|
20
|
+
description="Run an evaluation",
|
|
21
|
+
help="Run an evaluation",
|
|
22
|
+
)
|
|
23
|
+
parser_eval.add_argument(
|
|
24
|
+
"file",
|
|
25
|
+
nargs="*",
|
|
26
|
+
help="Files or a file containing the evaluation to run. "
|
|
27
|
+
+ "If no file name is provided, all evaluation files in the `evals` directory are run as long "
|
|
28
|
+
+ "as they match *_eval.py or eval_*.py",
|
|
29
|
+
default=[],
|
|
30
|
+
)
|
|
31
|
+
parser_eval.add_argument(
|
|
32
|
+
"--continue-on-error",
|
|
33
|
+
action="store_true",
|
|
34
|
+
default=False,
|
|
35
|
+
help="Continue execution upon errors",
|
|
36
|
+
)
|
|
37
|
+
parser_eval.add_argument(
|
|
38
|
+
"--output-file",
|
|
39
|
+
help="Output file to write the results to. Outputs are written in JSON format.",
|
|
40
|
+
nargs="?",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def setup_add_cursor_rules_parser(subparsers: _SubParsersAction) -> None:
|
|
45
|
+
"""Setup the add-cursor-rules subcommand parser."""
|
|
46
|
+
subparsers.add_parser(
|
|
47
|
+
"add-cursor-rules",
|
|
48
|
+
description="Download laminar.mdc file and add it to .cursor/rules",
|
|
49
|
+
help="Download laminar.mdc file and add it to .cursor/rules",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def setup_laminar_args(parser: ArgumentParser) -> None:
|
|
54
|
+
"""Setup the laminar arguments parser."""
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--project-api-key",
|
|
57
|
+
help="[Optional] Project API key to use for the command. "
|
|
58
|
+
+ "If no project API key is provided, the project API key will be read "
|
|
59
|
+
+ "from the environment variable LMNR_PROJECT_API_KEY.",
|
|
60
|
+
default=from_env("LMNR_PROJECT_API_KEY"),
|
|
61
|
+
)
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--base-url",
|
|
64
|
+
help="[Optional] Base URL to use for the command. "
|
|
65
|
+
+ "If no base URL is provided, the base URL will be read from the "
|
|
66
|
+
+ "'LMNR_BASE_URL' environment variable or we default to 'https://api.lmnr.ai'.",
|
|
67
|
+
default=from_env("LMNR_BASE_URL") or "https://api.lmnr.ai",
|
|
68
|
+
)
|
|
69
|
+
parser.add_argument(
|
|
70
|
+
"--port",
|
|
71
|
+
help="[Optional] Port to use for the command. "
|
|
72
|
+
+ "If no port is provided, the port defaults to '443'.",
|
|
73
|
+
type=int,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def setup_datasets_list_parser(subparsers: _SubParsersAction) -> None:
|
|
78
|
+
"""Setup the datasets list subcommand parser."""
|
|
79
|
+
subparsers.add_parser(
|
|
80
|
+
"list",
|
|
81
|
+
description="List datasets",
|
|
82
|
+
help="List datasets",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def setup_datasets_push_parser(subparsers: _SubParsersAction) -> None:
|
|
87
|
+
"""Setup the datasets push subcommand parser."""
|
|
88
|
+
parser_datasets_push: ArgumentParser = subparsers.add_parser(
|
|
89
|
+
"push",
|
|
90
|
+
description="Push datapoints to an existing dataset",
|
|
91
|
+
help="Push datapoints to an existing dataset",
|
|
92
|
+
)
|
|
93
|
+
parser_datasets_push.add_argument(
|
|
94
|
+
"--name",
|
|
95
|
+
"-n",
|
|
96
|
+
help="Name of the dataset to push data to. Exactly one of name or id must be provided.",
|
|
97
|
+
default=None,
|
|
98
|
+
)
|
|
99
|
+
parser_datasets_push.add_argument(
|
|
100
|
+
"--id",
|
|
101
|
+
help="ID of the dataset to push data to. Exactly one of name or id must be provided.",
|
|
102
|
+
default=None,
|
|
103
|
+
)
|
|
104
|
+
parser_datasets_push.add_argument(
|
|
105
|
+
"paths",
|
|
106
|
+
nargs="*",
|
|
107
|
+
help="Paths to the files or directories containing the data to push to the dataset. "
|
|
108
|
+
+ "Supported formats: JSON, CSV, JSONL",
|
|
109
|
+
)
|
|
110
|
+
parser_datasets_push.add_argument(
|
|
111
|
+
"-r",
|
|
112
|
+
"--recursive",
|
|
113
|
+
action="store_true",
|
|
114
|
+
default=False,
|
|
115
|
+
help="Recursively read all files in the directories and their subdirectories.",
|
|
116
|
+
)
|
|
117
|
+
parser_datasets_push.add_argument(
|
|
118
|
+
"--batch-size",
|
|
119
|
+
type=int,
|
|
120
|
+
help="Batch size to push data in. If no batch size is provided, "
|
|
121
|
+
+ f"data is pushed in batches of '{DEFAULT_DATASET_PUSH_BATCH_SIZE}'.",
|
|
122
|
+
default=DEFAULT_DATASET_PUSH_BATCH_SIZE,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def setup_datasets_pull_parser(subparsers: _SubParsersAction) -> None:
|
|
127
|
+
"""Setup the datasets pull subcommand parser."""
|
|
128
|
+
parser_datasets_pull: ArgumentParser = subparsers.add_parser(
|
|
129
|
+
"pull",
|
|
130
|
+
description="Pull data from a dataset",
|
|
131
|
+
help="Pull data from a dataset",
|
|
132
|
+
)
|
|
133
|
+
parser_datasets_pull.add_argument(
|
|
134
|
+
"--name",
|
|
135
|
+
"-n",
|
|
136
|
+
help="Name of the dataset to pull data from",
|
|
137
|
+
default=None,
|
|
138
|
+
)
|
|
139
|
+
parser_datasets_pull.add_argument(
|
|
140
|
+
"--id",
|
|
141
|
+
help="ID of the dataset to pull data from",
|
|
142
|
+
default=None,
|
|
143
|
+
)
|
|
144
|
+
parser_datasets_pull.add_argument(
|
|
145
|
+
"output_path",
|
|
146
|
+
help="Path to the file to save the data to. "
|
|
147
|
+
+ "If no path is provided, data is printed to the console in the format "
|
|
148
|
+
+ "specified by '--output-format'.",
|
|
149
|
+
nargs="?",
|
|
150
|
+
)
|
|
151
|
+
parser_datasets_pull.add_argument(
|
|
152
|
+
"--output-format",
|
|
153
|
+
choices=["json", "csv", "jsonl"],
|
|
154
|
+
help="Output format to save the data to. "
|
|
155
|
+
+ "If no format is provided, it is inferred from the file extension.",
|
|
156
|
+
)
|
|
157
|
+
parser_datasets_pull.add_argument(
|
|
158
|
+
"--batch-size",
|
|
159
|
+
type=int,
|
|
160
|
+
help="Batch size to pull data in. If no batch size is provided, "
|
|
161
|
+
+ f"data is pulled in batches of '{DEFAULT_DATASET_PULL_BATCH_SIZE}'.",
|
|
162
|
+
default=DEFAULT_DATASET_PULL_BATCH_SIZE,
|
|
163
|
+
)
|
|
164
|
+
parser_datasets_pull.add_argument(
|
|
165
|
+
"--limit",
|
|
166
|
+
type=int,
|
|
167
|
+
help="Limit the number of data points to pull. "
|
|
168
|
+
+ "If no limit is provided, all data points are pulled.",
|
|
169
|
+
)
|
|
170
|
+
parser_datasets_pull.add_argument(
|
|
171
|
+
"--offset",
|
|
172
|
+
type=int,
|
|
173
|
+
help="Offset the number of data points to pull. "
|
|
174
|
+
+ "If no offset is provided, data is pulled from the beginning.",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def setup_datasets_create_parser(subparsers: _SubParsersAction) -> None:
|
|
179
|
+
"""Setup the datasets create subcommand parser."""
|
|
180
|
+
parser_datasets_create: ArgumentParser = subparsers.add_parser(
|
|
181
|
+
"create",
|
|
182
|
+
description="Create a dataset from input files and download it in Laminar format",
|
|
183
|
+
help="Create a dataset from input files and download it in Laminar format",
|
|
184
|
+
)
|
|
185
|
+
parser_datasets_create.add_argument(
|
|
186
|
+
"name",
|
|
187
|
+
help="Name of the dataset to create",
|
|
188
|
+
)
|
|
189
|
+
parser_datasets_create.add_argument(
|
|
190
|
+
"paths",
|
|
191
|
+
nargs="+",
|
|
192
|
+
help="Paths to the files or directories containing the data to push to the dataset. "
|
|
193
|
+
+ "Supported formats: JSON, CSV, JSONL",
|
|
194
|
+
)
|
|
195
|
+
parser_datasets_create.add_argument(
|
|
196
|
+
"-o",
|
|
197
|
+
"--output-file",
|
|
198
|
+
required=True,
|
|
199
|
+
help="Path to the file to save the pulled data to",
|
|
200
|
+
)
|
|
201
|
+
parser_datasets_create.add_argument(
|
|
202
|
+
"--output-format",
|
|
203
|
+
choices=["json", "csv", "jsonl"],
|
|
204
|
+
help="Output format to save the data to. "
|
|
205
|
+
+ "If no format is provided, it is inferred from the output file extension.",
|
|
206
|
+
)
|
|
207
|
+
parser_datasets_create.add_argument(
|
|
208
|
+
"-r",
|
|
209
|
+
"--recursive",
|
|
210
|
+
action="store_true",
|
|
211
|
+
default=False,
|
|
212
|
+
help="Recursively read all files in the directories and their subdirectories.",
|
|
213
|
+
)
|
|
214
|
+
parser_datasets_create.add_argument(
|
|
215
|
+
"--batch-size",
|
|
216
|
+
type=int,
|
|
217
|
+
help="Batch size to push/pull data in. If no batch size is provided, "
|
|
218
|
+
+ f"data is processed in batches of '{DEFAULT_DATASET_PUSH_BATCH_SIZE}'.",
|
|
219
|
+
default=DEFAULT_DATASET_PUSH_BATCH_SIZE,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def setup_datasets_parser(subparsers: _SubParsersAction) -> None:
|
|
224
|
+
"""Setup the datasets subcommand parser and its subcommands."""
|
|
225
|
+
parser_datasets: ArgumentParser = subparsers.add_parser(
|
|
226
|
+
"datasets",
|
|
227
|
+
description="Manage datasets",
|
|
228
|
+
help="Manage datasets",
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
setup_laminar_args(parser_datasets)
|
|
232
|
+
|
|
233
|
+
parser_datasets_subparsers = parser_datasets.add_subparsers(
|
|
234
|
+
title="command",
|
|
235
|
+
dest="command",
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Setup all dataset subcommands
|
|
239
|
+
setup_datasets_list_parser(parser_datasets_subparsers)
|
|
240
|
+
setup_datasets_push_parser(parser_datasets_subparsers)
|
|
241
|
+
setup_datasets_pull_parser(parser_datasets_subparsers)
|
|
242
|
+
setup_datasets_create_parser(parser_datasets_subparsers)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def cli() -> None:
|
|
246
|
+
"""Main CLI entry point."""
|
|
247
|
+
parser = ArgumentParser(
|
|
248
|
+
prog="lmnr",
|
|
249
|
+
description="CLI for Laminar. "
|
|
250
|
+
+ "Call `lmnr [subcommand] --help` for more information on each subcommand.",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")
|
|
254
|
+
|
|
255
|
+
# Setup all subcommand parsers
|
|
256
|
+
setup_eval_parser(subparsers)
|
|
257
|
+
setup_add_cursor_rules_parser(subparsers)
|
|
258
|
+
setup_datasets_parser(subparsers)
|
|
259
|
+
|
|
260
|
+
# Parse arguments and dispatch to appropriate handler
|
|
261
|
+
parsed = parser.parse_args()
|
|
262
|
+
|
|
263
|
+
if parsed.subcommand == "eval":
|
|
264
|
+
asyncio.run(run_evaluation(parsed))
|
|
265
|
+
elif parsed.subcommand == "add-cursor-rules":
|
|
266
|
+
add_cursor_rules()
|
|
267
|
+
elif parsed.subcommand == "datasets":
|
|
268
|
+
asyncio.run(handle_datasets_command(parsed))
|
|
269
|
+
else:
|
|
270
|
+
parser.print_help()
|
lmnr/cli/datasets.py
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
from argparse import Namespace
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
import orjson
|
|
9
|
+
|
|
10
|
+
from lmnr.sdk.client.asynchronous.async_client import AsyncLaminarClient
|
|
11
|
+
from lmnr.sdk.datasets.file_utils import load_from_paths, parse_paths
|
|
12
|
+
from lmnr.sdk.log import get_default_logger
|
|
13
|
+
from lmnr.sdk.types import Datapoint
|
|
14
|
+
|
|
15
|
+
LOG = get_default_logger(__name__, verbose=False)
|
|
16
|
+
DEFAULT_DATASET_PULL_BATCH_SIZE = 100
|
|
17
|
+
DEFAULT_DATASET_PUSH_BATCH_SIZE = 100
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _dump_json(data: Any, do_indent: bool = True) -> str:
|
|
21
|
+
return orjson.dumps(
|
|
22
|
+
data,
|
|
23
|
+
option=(orjson.OPT_INDENT_2 if do_indent else 0)
|
|
24
|
+
| orjson.OPT_SERIALIZE_DATACLASS
|
|
25
|
+
| orjson.OPT_SERIALIZE_UUID
|
|
26
|
+
| orjson.OPT_UTC_Z
|
|
27
|
+
| orjson.OPT_NON_STR_KEYS,
|
|
28
|
+
).decode()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def _pull_all_data(
|
|
32
|
+
client: AsyncLaminarClient,
|
|
33
|
+
identifier: dict,
|
|
34
|
+
batch_size: int,
|
|
35
|
+
offset: int = 0,
|
|
36
|
+
limit: int | None = None,
|
|
37
|
+
) -> list[Datapoint]:
|
|
38
|
+
"""
|
|
39
|
+
Pull all data from a dataset. This function does not close the client.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
client: The AsyncLaminarClient instance
|
|
43
|
+
identifier: Dict with either 'name' or 'id' key
|
|
44
|
+
batch_size: Size of batches to pull
|
|
45
|
+
offset: Starting offset for pulling data
|
|
46
|
+
limit: Maximum number of items to pull (None for all)
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
List of all pulled datapoints
|
|
50
|
+
"""
|
|
51
|
+
has_more = True
|
|
52
|
+
current_offset = offset
|
|
53
|
+
stop_at = offset + limit if limit else None
|
|
54
|
+
|
|
55
|
+
result: list[Datapoint] = []
|
|
56
|
+
while has_more and (stop_at is None or current_offset < stop_at):
|
|
57
|
+
data = await client.datasets.pull(
|
|
58
|
+
**identifier,
|
|
59
|
+
offset=current_offset,
|
|
60
|
+
limit=batch_size,
|
|
61
|
+
)
|
|
62
|
+
result.extend(data.items)
|
|
63
|
+
if stop_at is not None and current_offset + batch_size >= stop_at:
|
|
64
|
+
has_more = False
|
|
65
|
+
elif (
|
|
66
|
+
data.total_count is not None
|
|
67
|
+
and current_offset + batch_size >= data.total_count
|
|
68
|
+
):
|
|
69
|
+
has_more = False
|
|
70
|
+
current_offset += batch_size
|
|
71
|
+
|
|
72
|
+
if limit is not None:
|
|
73
|
+
return result[:limit]
|
|
74
|
+
return result
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _write_data_to_file(
|
|
78
|
+
data: list[Datapoint],
|
|
79
|
+
output_path: Path,
|
|
80
|
+
output_format: str | None = None,
|
|
81
|
+
) -> bool:
|
|
82
|
+
"""
|
|
83
|
+
Write datapoints to a file.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
data: List of datapoints to write
|
|
87
|
+
output_path: Path to write the file to
|
|
88
|
+
output_format: Optional format override (json, csv, jsonl)
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
True if successful, False otherwise
|
|
92
|
+
"""
|
|
93
|
+
if output_path.is_dir():
|
|
94
|
+
LOG.error(f"Output path is a directory: {output_path}")
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
# Create parent directories if they don't exist
|
|
98
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
|
|
100
|
+
# Determine output format
|
|
101
|
+
format = output_format or output_path.suffix[1:]
|
|
102
|
+
if output_format and output_format != output_path.suffix[1:]:
|
|
103
|
+
LOG.warning(
|
|
104
|
+
f"Output format {output_format} does not match file extension {output_path.suffix[1:]}"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if format not in ["json", "csv", "jsonl"]:
|
|
108
|
+
LOG.error(f"Unsupported output format: {format}")
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
# Write output file
|
|
112
|
+
if format == "json":
|
|
113
|
+
output_path.write_text(_dump_json([item.model_dump() for item in data]))
|
|
114
|
+
elif format == "csv":
|
|
115
|
+
if not data:
|
|
116
|
+
LOG.error("No data to write to CSV")
|
|
117
|
+
return False
|
|
118
|
+
with output_path.open("w") as f:
|
|
119
|
+
writer = csv.writer(f)
|
|
120
|
+
keys = list(data[0].model_dump().keys())
|
|
121
|
+
writer.writerow(keys)
|
|
122
|
+
for item in data:
|
|
123
|
+
writer.writerow([item.model_dump()[key] for key in keys])
|
|
124
|
+
elif format == "jsonl":
|
|
125
|
+
with output_path.open("w") as f:
|
|
126
|
+
for item in data:
|
|
127
|
+
f.write(_dump_json(item.model_dump(), do_indent=False) + "\n")
|
|
128
|
+
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _print_data_to_console(data: list[Datapoint], output_format: str = "json") -> bool:
|
|
133
|
+
"""
|
|
134
|
+
Print datapoints to console.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
data: List of datapoints to print
|
|
138
|
+
output_format: Format to use (json, csv, jsonl)
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
True if successful, False otherwise
|
|
142
|
+
"""
|
|
143
|
+
if output_format not in ["json", "csv", "jsonl"]:
|
|
144
|
+
LOG.error(f"Unsupported output format: {output_format}")
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
if output_format == "json":
|
|
148
|
+
print(_dump_json([item.model_dump() for item in data]))
|
|
149
|
+
elif output_format == "csv":
|
|
150
|
+
if not data:
|
|
151
|
+
LOG.error("No data to print")
|
|
152
|
+
return False
|
|
153
|
+
writer = csv.writer(sys.stdout)
|
|
154
|
+
keys = list(data[0].model_dump().keys())
|
|
155
|
+
writer.writerow(keys)
|
|
156
|
+
for item in data:
|
|
157
|
+
writer.writerow([item.model_dump()[key] for key in keys])
|
|
158
|
+
elif output_format == "jsonl":
|
|
159
|
+
for item in data:
|
|
160
|
+
print(_dump_json(item.model_dump(), do_indent=False))
|
|
161
|
+
print()
|
|
162
|
+
|
|
163
|
+
return True
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
async def handle_datasets_list(args: Namespace) -> None:
|
|
167
|
+
"""
|
|
168
|
+
Handle datasets list command.
|
|
169
|
+
|
|
170
|
+
Lists all datasets in a formatted table.
|
|
171
|
+
"""
|
|
172
|
+
client = AsyncLaminarClient(
|
|
173
|
+
project_api_key=args.project_api_key,
|
|
174
|
+
base_url=args.base_url,
|
|
175
|
+
port=args.port,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
datasets = await client.datasets.list_datasets()
|
|
180
|
+
except Exception as e:
|
|
181
|
+
LOG.error(f"Failed to list datasets: {e}")
|
|
182
|
+
return
|
|
183
|
+
finally:
|
|
184
|
+
await client.close()
|
|
185
|
+
|
|
186
|
+
if not datasets:
|
|
187
|
+
print("No datasets found.")
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
# Print table header
|
|
191
|
+
id_width = 36 # UUID length
|
|
192
|
+
created_at_width = 19 # YYYY-MM-DD HH:MM:SS format
|
|
193
|
+
|
|
194
|
+
print(f"\n{'ID':<{id_width}} {'Created At':<{created_at_width}} Name")
|
|
195
|
+
print(f"{'-' * id_width} {'-' * created_at_width} {'-' * 20}")
|
|
196
|
+
|
|
197
|
+
# Print each dataset row
|
|
198
|
+
for dataset in datasets:
|
|
199
|
+
created_at_str = dataset.created_at.strftime("%Y-%m-%d %H:%M:%S")
|
|
200
|
+
print(
|
|
201
|
+
f"{str(dataset.id):<{id_width}} {created_at_str:<{created_at_width}} {dataset.name}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
print(f"\nTotal: {len(datasets)} dataset(s)\n")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
async def handle_datasets_push(args: Namespace) -> None:
|
|
208
|
+
"""
|
|
209
|
+
Handle datasets push command.
|
|
210
|
+
|
|
211
|
+
Pushes data from file(s) to an existing dataset.
|
|
212
|
+
"""
|
|
213
|
+
if not args.name and not args.id:
|
|
214
|
+
LOG.error("Either name or id must be provided")
|
|
215
|
+
return
|
|
216
|
+
if args.name and args.id:
|
|
217
|
+
LOG.error("Only one of name or id must be provided")
|
|
218
|
+
return
|
|
219
|
+
identifier = {"name": args.name} if args.name else {"id": args.id}
|
|
220
|
+
client = AsyncLaminarClient(
|
|
221
|
+
project_api_key=args.project_api_key,
|
|
222
|
+
base_url=args.base_url,
|
|
223
|
+
port=args.port,
|
|
224
|
+
)
|
|
225
|
+
data = load_from_paths(parse_paths(args.paths), recursive=args.recursive)
|
|
226
|
+
if len(data) == 0:
|
|
227
|
+
LOG.warning("No data to push. Skipping")
|
|
228
|
+
return
|
|
229
|
+
try:
|
|
230
|
+
await client.datasets.push(
|
|
231
|
+
data,
|
|
232
|
+
**identifier,
|
|
233
|
+
batch_size=args.batch_size or DEFAULT_DATASET_PUSH_BATCH_SIZE,
|
|
234
|
+
)
|
|
235
|
+
LOG.info(f"Pushed {len(data)} data points to dataset {args.name or args.id}")
|
|
236
|
+
except Exception as e:
|
|
237
|
+
LOG.error(f"Failed to push dataset: {e}")
|
|
238
|
+
finally:
|
|
239
|
+
await client.close()
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
async def handle_datasets_pull(args: Namespace) -> None:
|
|
243
|
+
"""
|
|
244
|
+
Handle datasets pull command.
|
|
245
|
+
|
|
246
|
+
Pulls data from a dataset and saves it to a file.
|
|
247
|
+
"""
|
|
248
|
+
if not args.name and not args.id:
|
|
249
|
+
LOG.error("Either name or id must be provided")
|
|
250
|
+
return
|
|
251
|
+
if args.name and args.id:
|
|
252
|
+
LOG.error("Only one of name or id must be provided")
|
|
253
|
+
return
|
|
254
|
+
identifier = {"name": args.name} if args.name else {"id": args.id}
|
|
255
|
+
client = AsyncLaminarClient(
|
|
256
|
+
project_api_key=args.project_api_key,
|
|
257
|
+
base_url=args.base_url,
|
|
258
|
+
port=args.port,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# Pull data from dataset
|
|
262
|
+
try:
|
|
263
|
+
result = await _pull_all_data(
|
|
264
|
+
client=client,
|
|
265
|
+
identifier=identifier,
|
|
266
|
+
batch_size=args.batch_size or DEFAULT_DATASET_PULL_BATCH_SIZE,
|
|
267
|
+
offset=args.offset or 0,
|
|
268
|
+
limit=args.limit,
|
|
269
|
+
)
|
|
270
|
+
except Exception as e:
|
|
271
|
+
LOG.error(f"Failed to pull dataset: {e}")
|
|
272
|
+
return
|
|
273
|
+
finally:
|
|
274
|
+
await client.close()
|
|
275
|
+
|
|
276
|
+
# Write to file or print to console
|
|
277
|
+
if args.output_path:
|
|
278
|
+
if not _write_data_to_file(
|
|
279
|
+
data=result,
|
|
280
|
+
output_path=Path(args.output_path),
|
|
281
|
+
output_format=args.output_format,
|
|
282
|
+
):
|
|
283
|
+
return
|
|
284
|
+
else:
|
|
285
|
+
if not _print_data_to_console(
|
|
286
|
+
data=result,
|
|
287
|
+
output_format=args.output_format or "json",
|
|
288
|
+
):
|
|
289
|
+
return
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
async def handle_datasets_create(args: Namespace) -> None:
|
|
293
|
+
"""
|
|
294
|
+
Handle datasets create command.
|
|
295
|
+
|
|
296
|
+
Creates a dataset from input files, pushes the data to it, and then pulls it back
|
|
297
|
+
in Laminar format to save to the output file.
|
|
298
|
+
"""
|
|
299
|
+
client = AsyncLaminarClient(
|
|
300
|
+
project_api_key=args.project_api_key,
|
|
301
|
+
base_url=args.base_url,
|
|
302
|
+
port=args.port,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Load data from input files
|
|
306
|
+
data = load_from_paths(parse_paths(args.paths), recursive=args.recursive)
|
|
307
|
+
if len(data) == 0:
|
|
308
|
+
LOG.warning("No data to push. Skipping")
|
|
309
|
+
return
|
|
310
|
+
|
|
311
|
+
# Push data to create/populate the dataset
|
|
312
|
+
LOG.info(f"Pushing {len(data)} data points to dataset '{args.name}'...")
|
|
313
|
+
try:
|
|
314
|
+
await client.datasets.push(
|
|
315
|
+
data,
|
|
316
|
+
name=args.name,
|
|
317
|
+
batch_size=args.batch_size or DEFAULT_DATASET_PUSH_BATCH_SIZE,
|
|
318
|
+
create_dataset=True,
|
|
319
|
+
)
|
|
320
|
+
LOG.info(
|
|
321
|
+
f"Successfully pushed {len(data)} data points to dataset '{args.name}'"
|
|
322
|
+
)
|
|
323
|
+
except Exception as e:
|
|
324
|
+
LOG.error(f"Failed to create dataset: {e}")
|
|
325
|
+
return
|
|
326
|
+
|
|
327
|
+
# Pull data back from the dataset
|
|
328
|
+
LOG.info(f"Pulling data from dataset '{args.name}'...")
|
|
329
|
+
try:
|
|
330
|
+
result = await _pull_all_data(
|
|
331
|
+
client=client,
|
|
332
|
+
identifier={"name": args.name},
|
|
333
|
+
batch_size=args.batch_size or DEFAULT_DATASET_PULL_BATCH_SIZE,
|
|
334
|
+
offset=0,
|
|
335
|
+
limit=None,
|
|
336
|
+
)
|
|
337
|
+
except Exception as e:
|
|
338
|
+
LOG.error(f"Failed to pull dataset after creation: {e}")
|
|
339
|
+
return
|
|
340
|
+
finally:
|
|
341
|
+
await client.close()
|
|
342
|
+
|
|
343
|
+
# Save to output file
|
|
344
|
+
if not _write_data_to_file(
|
|
345
|
+
data=result,
|
|
346
|
+
output_path=Path(args.output_file),
|
|
347
|
+
output_format=args.output_format,
|
|
348
|
+
):
|
|
349
|
+
return
|
|
350
|
+
|
|
351
|
+
LOG.info(
|
|
352
|
+
f"Successfully created dataset '{args.name}' and saved {len(result)} data points to {args.output_file}"
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
async def handle_datasets_command(args: Namespace) -> None:
|
|
357
|
+
"""
|
|
358
|
+
Handle datasets subcommand dispatching.
|
|
359
|
+
|
|
360
|
+
Dispatches to the appropriate handler based on the command.
|
|
361
|
+
"""
|
|
362
|
+
if args.command == "list":
|
|
363
|
+
await handle_datasets_list(args)
|
|
364
|
+
elif args.command == "push":
|
|
365
|
+
await handle_datasets_push(args)
|
|
366
|
+
elif args.command == "pull":
|
|
367
|
+
await handle_datasets_pull(args)
|
|
368
|
+
elif args.command == "create":
|
|
369
|
+
await handle_datasets_create(args)
|
|
370
|
+
else:
|
|
371
|
+
LOG.error(f"Unknown datasets command: {args.command}")
|