rosetta-cli 2.0.4__tar.gz → 2.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {rosetta_cli-2.0.4/rosetta_cli.egg-info → rosetta_cli-2.0.6}/PKG-INFO +8 -1
  2. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/README.md +7 -0
  3. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/pyproject.toml +1 -1
  4. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/cli.py +19 -1
  5. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/cleanup_command.py +4 -4
  6. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/list_command.py +4 -4
  7. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/parse_command.py +3 -3
  8. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ims_publisher.py +9 -32
  9. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6/rosetta_cli.egg-info}/PKG-INFO +8 -1
  10. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/MANIFEST.in +0 -0
  11. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/env.template +0 -0
  12. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/__init__.py +0 -0
  13. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/__main__.py +0 -0
  14. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/__init__.py +0 -0
  15. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/base_command.py +0 -0
  16. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/publish_command.py +0 -0
  17. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/commands/verify_command.py +0 -0
  18. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ims_auth.py +0 -0
  19. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ims_config.py +0 -0
  20. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ims_utils.py +0 -0
  21. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/ragflow_client.py +0 -0
  22. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/__init__.py +0 -0
  23. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/auth_service.py +0 -0
  24. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/dataset_service.py +0 -0
  25. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/document_data.py +0 -0
  26. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/services/document_service.py +0 -0
  27. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli/typing_utils.py +0 -0
  28. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/SOURCES.txt +0 -0
  29. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/dependency_links.txt +0 -0
  30. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/entry_points.txt +0 -0
  31. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/requires.txt +0 -0
  32. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/rosetta_cli.egg-info/top_level.txt +0 -0
  33. {rosetta_cli-2.0.4 → rosetta_cli-2.0.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rosetta-cli
3
- Version: 2.0.4
3
+ Version: 2.0.6
4
4
  Summary: Rosetta CLI for publishing knowledge base content to RAGFlow
5
5
  Author: Igor Solomatov
6
6
  License-Expression: Apache-2.0
@@ -89,6 +89,7 @@ Complete setup instructions are in [docs/QUICKSTART.md](../docs/QUICKSTART.md).
89
89
  ### Installed Usage
90
90
 
91
91
  ```bash
92
+ uvx rosetta-cli@latest version
92
93
  uvx rosetta-cli@latest verify
93
94
  ```
94
95
 
@@ -105,6 +106,12 @@ venv/bin/rosetta-cli verify
105
106
 
106
107
  All commands support `--env <environment>` flag to override the active environment.
107
108
 
109
+ ### Version
110
+
111
+ ```bash
112
+ uvx rosetta-cli@latest version
113
+ ```
114
+
108
115
  ### Publishing Commands
109
116
 
110
117
  #### Publish Knowledge Base Content
@@ -61,6 +61,7 @@ Complete setup instructions are in [docs/QUICKSTART.md](../docs/QUICKSTART.md).
61
61
  ### Installed Usage
62
62
 
63
63
  ```bash
64
+ uvx rosetta-cli@latest version
64
65
  uvx rosetta-cli@latest verify
65
66
  ```
66
67
 
@@ -77,6 +78,12 @@ venv/bin/rosetta-cli verify
77
78
 
78
79
  All commands support `--env <environment>` flag to override the active environment.
79
80
 
81
+ ### Version
82
+
83
+ ```bash
84
+ uvx rosetta-cli@latest version
85
+ ```
86
+
80
87
  ### Publishing Commands
81
88
 
82
89
  #### Publish Knowledge Base Content
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rosetta-cli"
7
- version = "2.0.4"
7
+ version = "2.0.6"
8
8
  description = "Rosetta CLI for publishing knowledge base content to RAGFlow"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -30,6 +30,11 @@ COMMAND_REGISTRY: dict[str, CommandClass] = {
30
30
  }
31
31
 
32
32
 
33
+ def _print_version() -> None:
34
+ """Print the CLI version."""
35
+ print(f"Rosetta Version: {_CLI_VERSION}")
36
+
37
+
33
38
  def execute_command(command_name: str, args: CommandArgs, client: RAGFlowClient, config: IMSConfig) -> int:
34
39
  """
35
40
  Execute a command by name using the command registry.
@@ -60,6 +65,9 @@ def main() -> int:
60
65
  formatter_class=argparse.RawDescriptionHelpFormatter,
61
66
  epilog="""
62
67
  Examples:
68
+ # Print CLI version
69
+ rosetta-cli version
70
+
63
71
  # Publish knowledge base content from a folder (with timing)
64
72
  rosetta-cli publish ../instructions
65
73
 
@@ -217,6 +225,12 @@ Frontmatter Metadata (publish flow):
217
225
  default=None,
218
226
  help='Explicit path to a .env file'
219
227
  )
228
+
229
+ # Version command
230
+ subparsers.add_parser(
231
+ 'version',
232
+ help='Print CLI version and exit'
233
+ )
220
234
 
221
235
  # List dataset command
222
236
  list_parser = subparsers.add_parser(
@@ -340,6 +354,10 @@ Frontmatter Metadata (publish flow):
340
354
  if not args.command:
341
355
  parser.print_help()
342
356
  return 1
357
+
358
+ if args.command == 'version':
359
+ _print_version()
360
+ return 0
343
361
 
344
362
  try:
345
363
  # Load configuration
@@ -348,7 +366,7 @@ Frontmatter Metadata (publish flow):
348
366
  # Validate configuration
349
367
  config.validate()
350
368
 
351
- print(f"Rosetta Version: {_CLI_VERSION}")
369
+ _print_version()
352
370
  print(f"Rosetta Environment: {config.environment}")
353
371
 
354
372
  # Initialize RAGFlow client
@@ -19,6 +19,10 @@ class CleanupCommand(BaseCommand):
19
19
  def execute(self, args: CommandArgs) -> int:
20
20
  """Execute cleanup-dataset command."""
21
21
  self._start_timing()
22
+
23
+ # Verify authentication before any dataset auto-detection touches the API.
24
+ from ..services.auth_service import AuthService
25
+ AuthService.verify_or_exit(self.client, self.config)
22
26
 
23
27
  # Resolve dataset name
24
28
  dataset_service = DatasetService(self.client, self.config)
@@ -32,10 +36,6 @@ class CleanupCommand(BaseCommand):
32
36
  print(f"Environment: {self.config.environment}")
33
37
  print(f"RAGFlow Instance: {self.config.base_url}\n")
34
38
 
35
- # Verify authentication
36
- from ..services.auth_service import AuthService
37
- AuthService.verify_or_exit(self.client, self.config)
38
-
39
39
  try:
40
40
  # Get dataset
41
41
  dataset = self.client.get_dataset(name=dataset_name)
@@ -15,6 +15,10 @@ class ListCommand(BaseCommand):
15
15
  def execute(self, args: CommandArgs) -> int:
16
16
  """Execute list-dataset command."""
17
17
  self._start_timing()
18
+
19
+ # Verify authentication before any dataset auto-detection touches the API.
20
+ from ..services.auth_service import AuthService
21
+ AuthService.verify_or_exit(self.client, self.config)
18
22
 
19
23
  # Resolve dataset name
20
24
  dataset_service = DatasetService(self.client, self.config)
@@ -30,10 +34,6 @@ class ListCommand(BaseCommand):
30
34
  print(f"RAGFlow Instance: {self.config.base_url}")
31
35
  print()
32
36
 
33
- # Verify authentication
34
- from ..services.auth_service import AuthService
35
- AuthService.verify_or_exit(self.client, self.config)
36
-
37
37
  try:
38
38
  # Get dataset
39
39
  dataset = self.client.get_dataset(name=dataset_name)
@@ -27,6 +27,9 @@ class ParseCommand(BaseCommand):
27
27
 
28
28
  # CLI flag must override config default for this run.
29
29
  self.config.parse_timeout = args.parse_timeout
30
+
31
+ # Verify authentication before any dataset auto-detection touches the API.
32
+ AuthService.verify_or_exit(self.client, self.config)
30
33
 
31
34
  # Resolve dataset name
32
35
  dataset_service = DatasetService(self.client, self.config)
@@ -44,9 +47,6 @@ class ParseCommand(BaseCommand):
44
47
  print("DRY-RUN MODE - No parsing will be triggered")
45
48
  print(f"Environment: {self.config.environment}")
46
49
  print(f"RAGFlow Instance: {self.config.base_url}\n")
47
-
48
- # Verify authentication
49
- AuthService.verify_or_exit(self.client, self.config)
50
50
  print()
51
51
 
52
52
  try:
@@ -391,7 +391,7 @@ class ContentPublisher:
391
391
 
392
392
 
393
393
  if dry_run:
394
- print(f"[DRY RUN] Would publish: {file.name}")
394
+ print(f"[DRY RUN] Would publish: {metadata.get('doc_title', metadata.get('original_path', file.name))}")
395
395
  print(f" Document ID: {ims_doc_id}")
396
396
  print(f" Dataset: {dataset_name}")
397
397
  print(f" File type: {'text' if is_text else 'binary'}")
@@ -665,22 +665,16 @@ class ContentPublisher:
665
665
  new_name = f"{re.sub(r'\(\d+\)$', '', path.stem)}{path.suffix}"
666
666
  return name[: -len(path.name)] + new_name
667
667
 
668
- # Delete docs with no metadata (ims_doc_id absent) — these are RAGFlow
669
- # auto-renamed leftovers (e.g. SKILL(1).md) from uploads where metadata
670
- # was never written. They are unrecoverable and must be cleaned up first.
668
+ # Collect all docs to delete into one list: (doc, label)
669
+ # Start with unmanaged: incomplete metadata (ims_doc_id or original_path absent)
670
+ duplicates: list[tuple[DocumentLike, str]] = []
671
671
  for doc in all_docs:
672
672
  meta = getattr(doc, "meta_fields", {}) or {}
673
673
  ims_doc_id = meta.get("ims_doc_id") if isinstance(meta, dict) else getattr(meta, "ims_doc_id", None)
674
- if not ims_doc_id:
674
+ doc_original_path = meta.get("original_path", "") if isinstance(meta, dict) else getattr(meta, "original_path", "")
675
+ if not ims_doc_id or not doc_original_path:
675
676
  doc_name = getattr(doc, "name", "") or doc.id
676
- if dry_run:
677
- print(f" [DRY RUN] Would delete unmanaged (no metadata): {doc_name}")
678
- else:
679
- try:
680
- dataset.delete_documents([doc.id])
681
- print(f" Deleted unmanaged (no metadata): {doc_name}")
682
- except Exception as e:
683
- print(f" Warning: Failed to delete unmanaged doc '{doc_name}': {e}")
677
+ duplicates.append((doc, doc_name))
684
678
 
685
679
  managed_docs = []
686
680
  for doc in all_docs:
@@ -697,29 +691,12 @@ class ContentPublisher:
697
691
  if original_path:
698
692
  by_path[original_path].append(doc)
699
693
 
700
- duplicates = [] # (doc, original_path) to delete
694
+ # All copies of a duplicated original_path are deleted; publish creates a fresh copy.
701
695
  for original_path, docs in by_path.items():
702
696
  if len(docs) <= 1:
703
697
  continue
704
-
705
- canonical_ims_doc_id = canonical_ids.get(original_path)
706
- # Find the authoritative doc (matching local ims_doc_id)
707
- authoritative: DocumentLike | None = None
708
- if canonical_ims_doc_id:
709
- for doc in docs:
710
- meta = getattr(doc, "meta_fields", {}) or {}
711
- doc_ims_id = meta.get("ims_doc_id", "") if isinstance(meta, dict) else getattr(meta, "ims_doc_id", "")
712
- if doc_ims_id == canonical_ims_doc_id:
713
- authoritative = doc
714
- break
715
-
716
- # Fallback: keep most recent (first in desc order)
717
- if authoritative is None:
718
- authoritative = docs[0]
719
-
720
698
  for doc in docs:
721
- if doc.id != authoritative.id:
722
- duplicates.append((doc, original_path))
699
+ duplicates.append((doc, original_path))
723
700
 
724
701
  # Name duplicates: foo.md + foo(1).md + foo(2).md ...
725
702
  name_groups: dict[str, list[DocumentLike]] = defaultdict(list)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rosetta-cli
3
- Version: 2.0.4
3
+ Version: 2.0.6
4
4
  Summary: Rosetta CLI for publishing knowledge base content to RAGFlow
5
5
  Author: Igor Solomatov
6
6
  License-Expression: Apache-2.0
@@ -89,6 +89,7 @@ Complete setup instructions are in [docs/QUICKSTART.md](../docs/QUICKSTART.md).
89
89
  ### Installed Usage
90
90
 
91
91
  ```bash
92
+ uvx rosetta-cli@latest version
92
93
  uvx rosetta-cli@latest verify
93
94
  ```
94
95
 
@@ -105,6 +106,12 @@ venv/bin/rosetta-cli verify
105
106
 
106
107
  All commands support `--env <environment>` flag to override the active environment.
107
108
 
109
+ ### Version
110
+
111
+ ```bash
112
+ uvx rosetta-cli@latest version
113
+ ```
114
+
108
115
  ### Publishing Commands
109
116
 
110
117
  #### Publish Knowledge Base Content
File without changes
File without changes
File without changes