rosetta-cli 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,317 @@
1
+ """
2
+ Configuration management for IMS Publishing
3
+
4
+ Handles environment variables and configuration loading for RAGFlow-based
5
+ IMS publishing tools.
6
+ """
7
+
8
+ import os
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+
12
+ from dotenv import load_dotenv
13
+
14
+ from .typing_utils import JsonDict
15
+
16
+ ENV_FILE_ENV_VAR = "ROSETTA_CLI_ENV_FILE"
17
+
18
+
19
+ def _candidate_env_names(env_name: str | None) -> list[str]:
20
+ names: list[str] = []
21
+ if env_name:
22
+ names.append(f".env.{env_name}")
23
+ names.append(".env")
24
+ return names
25
+
26
+
27
+ def find_env_file(env_name: str | None = None) -> Path | None:
28
+ """
29
+ .env file discovery.
30
+
31
+ Searches for `.env` files in the current working directory and its parents.
32
+ The optional `ROSETTA_CLI_ENV_FILE` environment variable takes precedence.
33
+
34
+ Args:
35
+ env_name: Environment name (e.g., "remote", "dev"). If provided,
36
+ looks for .env.<env_name> first, then falls back to .env
37
+
38
+ Returns:
39
+ Path to .env file if found, None otherwise
40
+
41
+ Examples:
42
+ >>> find_env_file("remote")
43
+ Path('/project/.env.remote')
44
+
45
+ >>> find_env_file()
46
+ Path('/project/.env')
47
+ """
48
+ explicit_env_file = os.getenv(ENV_FILE_ENV_VAR)
49
+ if explicit_env_file:
50
+ env_path = Path(explicit_env_file).expanduser()
51
+ return env_path if env_path.exists() else None
52
+
53
+ current = Path.cwd().resolve()
54
+ for search_dir in (current, *current.parents):
55
+ for env_filename in _candidate_env_names(env_name):
56
+ env_path = search_dir / env_filename
57
+ if env_path.exists():
58
+ return env_path
59
+
60
+ return None
61
+
62
+
63
+ @dataclass
64
+ class IMSConfig:
65
+ """
66
+ RAGFlow configuration for IMS publishing.
67
+
68
+ Environment Variables:
69
+ RAGFLOW_BASE_URL: RAGFlow instance URL (e.g., http://ragflow.local)
70
+ RAGFLOW_API_KEY: API key for authentication
71
+ RAGFLOW_DATASET_DEFAULT: Default dataset name (default: "aia")
72
+ RAGFLOW_DATASET_TEMPLATE: Template for dataset names (default: "aia-{release}")
73
+ RAGFLOW_EMBEDDING_MODEL: Embedding model (format: model_name@provider)
74
+ RAGFLOW_CHUNK_METHOD: Chunking method (default: "naive")
75
+ RAGFLOW_CHUNK_TOKEN_NUM: Chunk size in tokens (default: 512)
76
+ RAGFLOW_DELIMITER: Delimiter for splitting chunks (default: \n)
77
+ RAGFLOW_AUTO_KEYWORDS: Auto-generate keywords per chunk (default: 0)
78
+ RAGFLOW_AUTO_QUESTIONS: Auto-generate questions per chunk (default: 0)
79
+ RAGFLOW_PAGE_SIZE: Page size for listing operations (default: 1000)
80
+ RAGFLOW_PARSE_TIMEOUT: Timeout for parsing operations in seconds (default: 300)
81
+ ENVIRONMENT: Environment name (default: "local")
82
+
83
+ Examples:
84
+ >>> config = IMSConfig.from_env(".env")
85
+ >>> print(config.base_url)
86
+ http://ragflow.local
87
+
88
+ >>> config = IMSConfig.from_env_vars()
89
+ >>> client = RAGFlowClient(config.api_key, config.base_url)
90
+ """
91
+
92
+ base_url: str
93
+ api_key: str
94
+ dataset_default: str = "aia"
95
+ dataset_template: str = "aia-{release}"
96
+ embedding_model: str | None = None
97
+ chunk_method: str = "naive"
98
+ parser_config: JsonDict | None = None
99
+ environment: str = "local"
100
+ page_size: int = 1000
101
+ parse_timeout: int = 300
102
+
103
+ @classmethod
104
+ def from_env(
105
+ cls,
106
+ env_file: str | None = None,
107
+ environment: str | None = None
108
+ ) -> "IMSConfig":
109
+ """
110
+ Load configuration from .env file.
111
+
112
+ Supports both explicit file paths and automatic discovery.
113
+
114
+ Args:
115
+ env_file: Explicit path to .env file. If not provided,
116
+ uses auto-discovery via find_env_file(). The
117
+ ROSETTA_CLI_ENV_FILE environment variable also works.
118
+ environment: Environment name for auto-discovery (e.g., "remote").
119
+ Only used if env_file is not provided.
120
+ Looks for .env.<environment> or .env files.
121
+
122
+ Returns:
123
+ IMSConfig instance
124
+
125
+ Raises:
126
+ FileNotFoundError: If env file cannot be found or is not provided
127
+ ValueError: If required environment variables are missing
128
+
129
+ Examples:
130
+ # Explicit file path
131
+ >>> config = IMSConfig.from_env("ragflow.env")
132
+
133
+ # Auto-discovery with environment
134
+ >>> config = IMSConfig.from_env(environment="remote")
135
+ # Searches for: .env.remote, then .env
136
+
137
+ # Auto-discovery (looks for .env)
138
+ >>> config = IMSConfig.from_env()
139
+ """
140
+ # Determine which file to load
141
+ env_path: Path
142
+ if env_file:
143
+ # Explicit file path provided
144
+ env_path = Path(env_file)
145
+ if not env_path.exists():
146
+ raise FileNotFoundError(f"Environment file not found: {env_file}")
147
+ else:
148
+ # Auto-discovery
149
+ discovered_env_path = find_env_file(environment)
150
+ if not discovered_env_path:
151
+ env_hint = f" (tried .env.{environment} and .env)" if environment else " (tried .env)"
152
+ raise FileNotFoundError(
153
+ f"No .env file found{env_hint}\n"
154
+ f"Current directory: {Path.cwd()}\n"
155
+ f"Env override: {os.getenv(ENV_FILE_ENV_VAR, '(not set)')}\n"
156
+ f"\nPlease create a .env file with RAGFLOW_BASE_URL and RAGFLOW_API_KEY"
157
+ )
158
+ env_path = discovered_env_path
159
+
160
+ # Load environment variables from file
161
+ load_dotenv(env_path)
162
+
163
+ return cls.from_env_vars(environment=environment)
164
+
165
+ @classmethod
166
+ def from_env_vars(cls, environment: str | None = None) -> "IMSConfig":
167
+ """
168
+ Load configuration from environment variables.
169
+
170
+ Args:
171
+ environment: Optional explicit environment name (e.g., "local",
172
+ "dev", "remote"). If provided, this value is used and takes
173
+ precedence over the ENVIRONMENT environment variable. If not
174
+ provided, the ENVIRONMENT variable is used, defaulting to
175
+ "local" when unset.
176
+
177
+ Returns:
178
+ IMSConfig instance
179
+
180
+ Raises:
181
+ ValueError: If RAGFLOW_API_KEY is missing
182
+
183
+ Examples:
184
+ >>> os.environ["RAGFLOW_BASE_URL"] = "http://ragflow.local"
185
+ >>> os.environ["RAGFLOW_API_KEY"] = "ragflow-xxx"
186
+ >>> config = IMSConfig.from_env_vars()
187
+ """
188
+ base_url = os.getenv("RAGFLOW_BASE_URL", "http://ragflow.local")
189
+ api_key = os.getenv("RAGFLOW_API_KEY", "")
190
+ dataset_default = os.getenv("RAGFLOW_DATASET_DEFAULT", "aia")
191
+ dataset_template = os.getenv("RAGFLOW_DATASET_TEMPLATE", "aia-{release}")
192
+ # fallback to ENVIRONMENT env var, or default to "local"
193
+ environment = environment or os.getenv("ENVIRONMENT", "local") or "local"
194
+
195
+ # Dataset creation settings
196
+ embedding_model = os.getenv("RAGFLOW_EMBEDDING_MODEL") or None
197
+ chunk_method = os.getenv("RAGFLOW_CHUNK_METHOD", "naive")
198
+
199
+ # Pagination and timeout settings
200
+ page_size = int(os.getenv("RAGFLOW_PAGE_SIZE", "1000"))
201
+ parse_timeout = int(os.getenv("RAGFLOW_PARSE_TIMEOUT", "300"))
202
+
203
+ # Parser configuration for naive chunking
204
+ parser_config: JsonDict | None = None
205
+ if chunk_method == "naive":
206
+ chunk_token_num = int(os.getenv("RAGFLOW_CHUNK_TOKEN_NUM", "512"))
207
+ delimiter = os.getenv("RAGFLOW_DELIMITER", "\\n")
208
+ auto_keywords = int(os.getenv("RAGFLOW_AUTO_KEYWORDS", "0"))
209
+ auto_questions = int(os.getenv("RAGFLOW_AUTO_QUESTIONS", "0"))
210
+
211
+ parser_config = {
212
+ "chunk_token_num": chunk_token_num,
213
+ "delimiter": delimiter.encode().decode('unicode_escape'), # Handle \n escape
214
+ "auto_keywords": auto_keywords,
215
+ "auto_questions": auto_questions
216
+ }
217
+
218
+ return cls(
219
+ base_url=base_url,
220
+ api_key=api_key,
221
+ dataset_default=dataset_default,
222
+ dataset_template=dataset_template,
223
+ embedding_model=embedding_model,
224
+ chunk_method=chunk_method,
225
+ parser_config=parser_config,
226
+ environment=environment,
227
+ page_size=page_size,
228
+ parse_timeout=parse_timeout
229
+ )
230
+
231
+ def validate(self) -> bool:
232
+ """
233
+ Validate configuration.
234
+
235
+ Returns:
236
+ True if configuration is valid
237
+
238
+ Raises:
239
+ ValueError: If configuration is invalid
240
+ """
241
+ if not self.base_url:
242
+ raise ValueError("base_url cannot be empty")
243
+
244
+ if not self.api_key:
245
+ raise ValueError("api_key cannot be empty")
246
+
247
+ if not self.base_url.startswith(("http://", "https://")):
248
+ raise ValueError(
249
+ f"base_url must start with http:// or https://, got: {self.base_url}"
250
+ )
251
+
252
+ if not self.api_key.startswith("ragflow-"):
253
+ print(
254
+ f"Warning: API key should start with 'ragflow-', got: {self.api_key[:10]}..."
255
+ )
256
+
257
+ return True
258
+
259
+ def save_credentials(self, env_file: str = ".env") -> None:
260
+ """
261
+ Save credentials to .env file.
262
+
263
+ Args:
264
+ env_file: Path to .env file to create/update
265
+
266
+ Examples:
267
+ >>> config = IMSConfig(...)
268
+ >>> config.save_credentials("ragflow.env")
269
+ """
270
+ env_path = Path(env_file)
271
+
272
+ # Read existing content if file exists
273
+ existing_lines: list[str] = []
274
+ ragflow_keys = {
275
+ "RAGFLOW_BASE_URL",
276
+ "RAGFLOW_API_KEY",
277
+ "RAGFLOW_DATASET_DEFAULT",
278
+ "RAGFLOW_DATASET_TEMPLATE",
279
+ "ENVIRONMENT"
280
+ }
281
+
282
+ if env_path.exists():
283
+ with open(env_path, 'r') as f:
284
+ for line in f:
285
+ # Keep lines that don't set RAGFlow variables
286
+ if not any(line.startswith(f"{key}=") for key in ragflow_keys):
287
+ existing_lines.append(line.rstrip())
288
+
289
+ # Build new content
290
+ new_lines = existing_lines + [
291
+ "",
292
+ "# RAGFlow Configuration",
293
+ f"RAGFLOW_BASE_URL={self.base_url}",
294
+ f"RAGFLOW_API_KEY={self.api_key}",
295
+ f"RAGFLOW_DATASET_DEFAULT={self.dataset_default}",
296
+ f"RAGFLOW_DATASET_TEMPLATE={self.dataset_template}",
297
+ f"ENVIRONMENT={self.environment}",
298
+ ]
299
+
300
+ # Write to file
301
+ with open(env_path, 'w') as f:
302
+ f.write('\n'.join(new_lines) + '\n')
303
+
304
+ print(f"Saved configuration to {env_file}")
305
+
306
+ def __str__(self) -> str:
307
+ """String representation (masks API key)"""
308
+ masked_key = f"{self.api_key[:10]}..." if len(self.api_key) > 10 else "***"
309
+ return (
310
+ f"IMSConfig(\n"
311
+ f" base_url={self.base_url}\n"
312
+ f" api_key={masked_key}\n"
313
+ f" dataset_default={self.dataset_default}\n"
314
+ f" dataset_template={self.dataset_template}\n"
315
+ f" environment={self.environment}\n"
316
+ f")"
317
+ )