rosetta-cli 2.0.4__tar.gz → 2.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rosetta_cli-2.0.4/rosetta_cli.egg-info → rosetta_cli-2.0.6}/PKG-INFO +8 -1
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/README.md +7 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/pyproject.toml +1 -1
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/cli.py +19 -1
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/cleanup_command.py +4 -4
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/list_command.py +4 -4
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/parse_command.py +3 -3
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ims_publisher.py +9 -32
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6/rosetta_cli.egg-info}/PKG-INFO +8 -1
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/MANIFEST.in +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/env.template +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/__init__.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/__main__.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/__init__.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/base_command.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/publish_command.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/verify_command.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ims_auth.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ims_config.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ims_utils.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ragflow_client.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/__init__.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/auth_service.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/dataset_service.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/document_data.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/document_service.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/typing_utils.py +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/SOURCES.txt +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/dependency_links.txt +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/entry_points.txt +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/requires.txt +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/top_level.txt +0 -0
- {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rosetta-cli
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.6
|
|
4
4
|
Summary: Rosetta CLI for publishing knowledge base content to RAGFlow
|
|
5
5
|
Author: Igor Solomatov
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -89,6 +89,7 @@ Complete setup instructions are in [docs/QUICKSTART.md](../docs/QUICKSTART.md).
|
|
|
89
89
|
### Installed Usage
|
|
90
90
|
|
|
91
91
|
```bash
|
|
92
|
+
uvx rosetta-cli@latest version
|
|
92
93
|
uvx rosetta-cli@latest verify
|
|
93
94
|
```
|
|
94
95
|
|
|
@@ -105,6 +106,12 @@ venv/bin/rosetta-cli verify
|
|
|
105
106
|
|
|
106
107
|
All commands support `--env <environment>` flag to override the active environment.
|
|
107
108
|
|
|
109
|
+
### Version
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
uvx rosetta-cli@latest version
|
|
113
|
+
```
|
|
114
|
+
|
|
108
115
|
### Publishing Commands
|
|
109
116
|
|
|
110
117
|
#### Publish Knowledge Base Content
|
|
@@ -61,6 +61,7 @@ Complete setup instructions are in [docs/QUICKSTART.md](../docs/QUICKSTART.md).
|
|
|
61
61
|
### Installed Usage
|
|
62
62
|
|
|
63
63
|
```bash
|
|
64
|
+
uvx rosetta-cli@latest version
|
|
64
65
|
uvx rosetta-cli@latest verify
|
|
65
66
|
```
|
|
66
67
|
|
|
@@ -77,6 +78,12 @@ venv/bin/rosetta-cli verify
|
|
|
77
78
|
|
|
78
79
|
All commands support `--env <environment>` flag to override the active environment.
|
|
79
80
|
|
|
81
|
+
### Version
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
uvx rosetta-cli@latest version
|
|
85
|
+
```
|
|
86
|
+
|
|
80
87
|
### Publishing Commands
|
|
81
88
|
|
|
82
89
|
#### Publish Knowledge Base Content
|
|
@@ -30,6 +30,11 @@ COMMAND_REGISTRY: dict[str, CommandClass] = {
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
def _print_version() -> None:
|
|
34
|
+
"""Print the CLI version."""
|
|
35
|
+
print(f"Rosetta Version: {_CLI_VERSION}")
|
|
36
|
+
|
|
37
|
+
|
|
33
38
|
def execute_command(command_name: str, args: CommandArgs, client: RAGFlowClient, config: IMSConfig) -> int:
|
|
34
39
|
"""
|
|
35
40
|
Execute a command by name using the command registry.
|
|
@@ -60,6 +65,9 @@ def main() -> int:
|
|
|
60
65
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
61
66
|
epilog="""
|
|
62
67
|
Examples:
|
|
68
|
+
# Print CLI version
|
|
69
|
+
rosetta-cli version
|
|
70
|
+
|
|
63
71
|
# Publish knowledge base content from a folder (with timing)
|
|
64
72
|
rosetta-cli publish ../instructions
|
|
65
73
|
|
|
@@ -217,6 +225,12 @@ Frontmatter Metadata (publish flow):
|
|
|
217
225
|
default=None,
|
|
218
226
|
help='Explicit path to a .env file'
|
|
219
227
|
)
|
|
228
|
+
|
|
229
|
+
# Version command
|
|
230
|
+
subparsers.add_parser(
|
|
231
|
+
'version',
|
|
232
|
+
help='Print CLI version and exit'
|
|
233
|
+
)
|
|
220
234
|
|
|
221
235
|
# List dataset command
|
|
222
236
|
list_parser = subparsers.add_parser(
|
|
@@ -340,6 +354,10 @@ Frontmatter Metadata (publish flow):
|
|
|
340
354
|
if not args.command:
|
|
341
355
|
parser.print_help()
|
|
342
356
|
return 1
|
|
357
|
+
|
|
358
|
+
if args.command == 'version':
|
|
359
|
+
_print_version()
|
|
360
|
+
return 0
|
|
343
361
|
|
|
344
362
|
try:
|
|
345
363
|
# Load configuration
|
|
@@ -348,7 +366,7 @@ Frontmatter Metadata (publish flow):
|
|
|
348
366
|
# Validate configuration
|
|
349
367
|
config.validate()
|
|
350
368
|
|
|
351
|
-
|
|
369
|
+
_print_version()
|
|
352
370
|
print(f"Rosetta Environment: {config.environment}")
|
|
353
371
|
|
|
354
372
|
# Initialize RAGFlow client
|
|
@@ -19,6 +19,10 @@ class CleanupCommand(BaseCommand):
|
|
|
19
19
|
def execute(self, args: CommandArgs) -> int:
|
|
20
20
|
"""Execute cleanup-dataset command."""
|
|
21
21
|
self._start_timing()
|
|
22
|
+
|
|
23
|
+
# Verify authentication before any dataset auto-detection touches the API.
|
|
24
|
+
from ..services.auth_service import AuthService
|
|
25
|
+
AuthService.verify_or_exit(self.client, self.config)
|
|
22
26
|
|
|
23
27
|
# Resolve dataset name
|
|
24
28
|
dataset_service = DatasetService(self.client, self.config)
|
|
@@ -32,10 +36,6 @@ class CleanupCommand(BaseCommand):
|
|
|
32
36
|
print(f"Environment: {self.config.environment}")
|
|
33
37
|
print(f"RAGFlow Instance: {self.config.base_url}\n")
|
|
34
38
|
|
|
35
|
-
# Verify authentication
|
|
36
|
-
from ..services.auth_service import AuthService
|
|
37
|
-
AuthService.verify_or_exit(self.client, self.config)
|
|
38
|
-
|
|
39
39
|
try:
|
|
40
40
|
# Get dataset
|
|
41
41
|
dataset = self.client.get_dataset(name=dataset_name)
|
|
@@ -15,6 +15,10 @@ class ListCommand(BaseCommand):
|
|
|
15
15
|
def execute(self, args: CommandArgs) -> int:
|
|
16
16
|
"""Execute list-dataset command."""
|
|
17
17
|
self._start_timing()
|
|
18
|
+
|
|
19
|
+
# Verify authentication before any dataset auto-detection touches the API.
|
|
20
|
+
from ..services.auth_service import AuthService
|
|
21
|
+
AuthService.verify_or_exit(self.client, self.config)
|
|
18
22
|
|
|
19
23
|
# Resolve dataset name
|
|
20
24
|
dataset_service = DatasetService(self.client, self.config)
|
|
@@ -30,10 +34,6 @@ class ListCommand(BaseCommand):
|
|
|
30
34
|
print(f"RAGFlow Instance: {self.config.base_url}")
|
|
31
35
|
print()
|
|
32
36
|
|
|
33
|
-
# Verify authentication
|
|
34
|
-
from ..services.auth_service import AuthService
|
|
35
|
-
AuthService.verify_or_exit(self.client, self.config)
|
|
36
|
-
|
|
37
37
|
try:
|
|
38
38
|
# Get dataset
|
|
39
39
|
dataset = self.client.get_dataset(name=dataset_name)
|
|
@@ -27,6 +27,9 @@ class ParseCommand(BaseCommand):
|
|
|
27
27
|
|
|
28
28
|
# CLI flag must override config default for this run.
|
|
29
29
|
self.config.parse_timeout = args.parse_timeout
|
|
30
|
+
|
|
31
|
+
# Verify authentication before any dataset auto-detection touches the API.
|
|
32
|
+
AuthService.verify_or_exit(self.client, self.config)
|
|
30
33
|
|
|
31
34
|
# Resolve dataset name
|
|
32
35
|
dataset_service = DatasetService(self.client, self.config)
|
|
@@ -44,9 +47,6 @@ class ParseCommand(BaseCommand):
|
|
|
44
47
|
print("DRY-RUN MODE - No parsing will be triggered")
|
|
45
48
|
print(f"Environment: {self.config.environment}")
|
|
46
49
|
print(f"RAGFlow Instance: {self.config.base_url}\n")
|
|
47
|
-
|
|
48
|
-
# Verify authentication
|
|
49
|
-
AuthService.verify_or_exit(self.client, self.config)
|
|
50
50
|
print()
|
|
51
51
|
|
|
52
52
|
try:
|
|
@@ -391,7 +391,7 @@ class ContentPublisher:
|
|
|
391
391
|
|
|
392
392
|
|
|
393
393
|
if dry_run:
|
|
394
|
-
print(f"[DRY RUN] Would publish: {file.name}")
|
|
394
|
+
print(f"[DRY RUN] Would publish: {metadata.get('doc_title', metadata.get('original_path', file.name))}")
|
|
395
395
|
print(f" Document ID: {ims_doc_id}")
|
|
396
396
|
print(f" Dataset: {dataset_name}")
|
|
397
397
|
print(f" File type: {'text' if is_text else 'binary'}")
|
|
@@ -665,22 +665,16 @@ class ContentPublisher:
|
|
|
665
665
|
new_name = f"{re.sub(r'\(\d+\)$', '', path.stem)}{path.suffix}"
|
|
666
666
|
return name[: -len(path.name)] + new_name
|
|
667
667
|
|
|
668
|
-
#
|
|
669
|
-
#
|
|
670
|
-
|
|
668
|
+
# Collect all docs to delete into one list: (doc, label)
|
|
669
|
+
# Start with unmanaged: incomplete metadata (ims_doc_id or original_path absent)
|
|
670
|
+
duplicates: list[tuple[DocumentLike, str]] = []
|
|
671
671
|
for doc in all_docs:
|
|
672
672
|
meta = getattr(doc, "meta_fields", {}) or {}
|
|
673
673
|
ims_doc_id = meta.get("ims_doc_id") if isinstance(meta, dict) else getattr(meta, "ims_doc_id", None)
|
|
674
|
-
if
|
|
674
|
+
doc_original_path = meta.get("original_path", "") if isinstance(meta, dict) else getattr(meta, "original_path", "")
|
|
675
|
+
if not ims_doc_id or not doc_original_path:
|
|
675
676
|
doc_name = getattr(doc, "name", "") or doc.id
|
|
676
|
-
|
|
677
|
-
print(f" [DRY RUN] Would delete unmanaged (no metadata): {doc_name}")
|
|
678
|
-
else:
|
|
679
|
-
try:
|
|
680
|
-
dataset.delete_documents([doc.id])
|
|
681
|
-
print(f" Deleted unmanaged (no metadata): {doc_name}")
|
|
682
|
-
except Exception as e:
|
|
683
|
-
print(f" Warning: Failed to delete unmanaged doc '{doc_name}': {e}")
|
|
677
|
+
duplicates.append((doc, doc_name))
|
|
684
678
|
|
|
685
679
|
managed_docs = []
|
|
686
680
|
for doc in all_docs:
|
|
@@ -697,29 +691,12 @@ class ContentPublisher:
|
|
|
697
691
|
if original_path:
|
|
698
692
|
by_path[original_path].append(doc)
|
|
699
693
|
|
|
700
|
-
|
|
694
|
+
# All copies of a duplicated original_path are deleted; publish creates a fresh copy.
|
|
701
695
|
for original_path, docs in by_path.items():
|
|
702
696
|
if len(docs) <= 1:
|
|
703
697
|
continue
|
|
704
|
-
|
|
705
|
-
canonical_ims_doc_id = canonical_ids.get(original_path)
|
|
706
|
-
# Find the authoritative doc (matching local ims_doc_id)
|
|
707
|
-
authoritative: DocumentLike | None = None
|
|
708
|
-
if canonical_ims_doc_id:
|
|
709
|
-
for doc in docs:
|
|
710
|
-
meta = getattr(doc, "meta_fields", {}) or {}
|
|
711
|
-
doc_ims_id = meta.get("ims_doc_id", "") if isinstance(meta, dict) else getattr(meta, "ims_doc_id", "")
|
|
712
|
-
if doc_ims_id == canonical_ims_doc_id:
|
|
713
|
-
authoritative = doc
|
|
714
|
-
break
|
|
715
|
-
|
|
716
|
-
# Fallback: keep most recent (first in desc order)
|
|
717
|
-
if authoritative is None:
|
|
718
|
-
authoritative = docs[0]
|
|
719
|
-
|
|
720
698
|
for doc in docs:
|
|
721
|
-
|
|
722
|
-
duplicates.append((doc, original_path))
|
|
699
|
+
duplicates.append((doc, original_path))
|
|
723
700
|
|
|
724
701
|
# Name duplicates: foo.md + foo(1).md + foo(2).md ...
|
|
725
702
|
name_groups: dict[str, list[DocumentLike]] = defaultdict(list)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rosetta-cli
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.6
|
|
4
4
|
Summary: Rosetta CLI for publishing knowledge base content to RAGFlow
|
|
5
5
|
Author: Igor Solomatov
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -89,6 +89,7 @@ Complete setup instructions are in [docs/QUICKSTART.md](../docs/QUICKSTART.md).
|
|
|
89
89
|
### Installed Usage
|
|
90
90
|
|
|
91
91
|
```bash
|
|
92
|
+
uvx rosetta-cli@latest version
|
|
92
93
|
uvx rosetta-cli@latest verify
|
|
93
94
|
```
|
|
94
95
|
|
|
@@ -105,6 +106,12 @@ venv/bin/rosetta-cli verify
|
|
|
105
106
|
|
|
106
107
|
All commands support `--env <environment>` flag to override the active environment.
|
|
107
108
|
|
|
109
|
+
### Version
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
uvx rosetta-cli@latest version
|
|
113
|
+
```
|
|
114
|
+
|
|
108
115
|
### Publishing Commands
|
|
109
116
|
|
|
110
117
|
#### Publish Knowledge Base Content
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|