mdify-cli 2.11.7__py3-none-any.whl → 2.11.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdify/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.11.7"
3
+ __version__ = "2.11.9"
mdify/cli.py CHANGED
@@ -41,6 +41,140 @@ OTHER_RUNTIMES_PRIORITY = ("docker", "podman")
41
41
  # Debug mode
42
42
  DEBUG = os.environ.get("MDIFY_DEBUG", "").lower() in ("1", "true", "yes")
43
43
 
44
+ # Resource profiles for container execution
45
+ RESOURCE_PROFILES = {
46
+ "minimal": {"cpus": 4, "memory": "8g", "description": "Small PDFs, text-only documents"},
47
+ "default": {"cpus": 6, "memory": "12g", "description": "Large PDFs, OCR, tables (recommended)"},
48
+ "heavy": {"cpus": 8, "memory": "16g", "description": "Batch processing, very large files"},
49
+ }
50
+
51
+
52
+ def get_available_memory_gb() -> float:
53
+ """Get available system memory in GB.
54
+
55
+ Returns:
56
+ Available memory in GB, or -1 if unable to determine
57
+ """
58
+ system = platform.system()
59
+ try:
60
+ if system == "Darwin": # macOS
61
+ # Get page size
62
+ result = subprocess.run(["pagesize"], capture_output=True, text=True, check=True)
63
+ page_size = int(result.stdout.strip())
64
+
65
+ # Get memory stats
66
+ result = subprocess.run(["vm_stat"], capture_output=True, text=True, check=True)
67
+ free_pages = 0
68
+ inactive_pages = 0
69
+ speculative_pages = 0
70
+
71
+ for line in result.stdout.split("\n"):
72
+ if "Pages free" in line:
73
+ free_pages = int(line.split(":")[1].strip().rstrip("."))
74
+ elif "Pages inactive" in line:
75
+ inactive_pages = int(line.split(":")[1].strip().rstrip("."))
76
+ elif "Pages speculative" in line:
77
+ speculative_pages = int(line.split(":")[1].strip().rstrip("."))
78
+
79
+ # Available memory = free + inactive + speculative
80
+ available_pages = free_pages + inactive_pages + speculative_pages
81
+ available_bytes = available_pages * page_size
82
+ return available_bytes / (1024**3) # Convert to GB
83
+ elif system == "Linux":
84
+ # Read from /proc/meminfo
85
+ with open("/proc/meminfo") as f:
86
+ for line in f:
87
+ if line.startswith("MemAvailable:"):
88
+ kb = int(line.split()[1])
89
+ return kb / (1024**2) # Convert to GB
90
+ except Exception:
91
+ pass
92
+
93
+ return -1
94
+
95
+
96
+ def parse_memory_string(mem_str: str) -> float:
97
+ """Parse memory string (e.g., '12g', '8192m') to GB.
98
+
99
+ Args:
100
+ mem_str: Memory string with unit (g, m, gb, mb)
101
+
102
+ Returns:
103
+ Memory in GB
104
+ """
105
+ mem_str = mem_str.lower().strip()
106
+
107
+ if mem_str.endswith("gb"):
108
+ return float(mem_str[:-2])
109
+ elif mem_str.endswith("g"):
110
+ return float(mem_str[:-1])
111
+ elif mem_str.endswith("mb"):
112
+ return float(mem_str[:-2]) / 1024
113
+ elif mem_str.endswith("m"):
114
+ return float(mem_str[:-1]) / 1024
115
+ else:
116
+ raise ValueError(f"Invalid memory format: {mem_str}")
117
+
118
+
119
+ def validate_memory_availability(
120
+ required_gb: float,
121
+ profile_name: str = "default",
122
+ suggest_profile: Optional[str] = None,
123
+ ) -> tuple[bool, str]:
124
+ """Check if system has sufficient available memory.
125
+
126
+ Args:
127
+ required_gb: Required memory in GB
128
+ profile_name: Name of current profile being used
129
+ suggest_profile: Name of smaller profile to suggest (auto-detected if None)
130
+
131
+ Returns:
132
+ Tuple of (is_sufficient, error_message)
133
+ """
134
+ available_gb = get_available_memory_gb()
135
+
136
+ if available_gb < 0:
137
+ # Unable to determine, allow startup with warning
138
+ return True, ""
139
+
140
+ if available_gb < required_gb:
141
+ # Determine which smaller profile to suggest
142
+ if suggest_profile is None:
143
+ if profile_name == "heavy":
144
+ suggest_profile = "default"
145
+ elif profile_name == "default":
146
+ suggest_profile = "minimal"
147
+ else:
148
+ suggest_profile = None # Already on minimal
149
+
150
+ error = (
151
+ f"Insufficient memory available for container startup.\n"
152
+ f" Current profile: {profile_name}\n"
153
+ f" Required: {required_gb:.1f} GB\n"
154
+ f" Available: {available_gb:.1f} GB\n"
155
+ f" Short by: {required_gb - available_gb:.1f} GB\n\n"
156
+ )
157
+
158
+ if suggest_profile:
159
+ suggested = RESOURCE_PROFILES[suggest_profile]
160
+ error += (
161
+ f"Suggested solutions:\n"
162
+ f" 1. Close other applications to free up memory\n"
163
+ f" 2. Use a smaller profile: --profile {suggest_profile} "
164
+ f"({suggested['cpus']} CPUs, {suggested['memory']} memory)\n"
165
+ f" 3. Skip memory check: --skip-memory-check (not recommended)"
166
+ )
167
+ else:
168
+ error += (
169
+ f"Suggested solutions:\n"
170
+ f" 1. Close other applications to free up memory\n"
171
+ f" 2. Skip memory check: --skip-memory-check (not recommended)"
172
+ )
173
+
174
+ return False, error
175
+
176
+ return True, ""
177
+
44
178
 
45
179
  # =============================================================================
46
180
  # Update checking functions
@@ -756,7 +890,28 @@ Examples:
756
890
  "--memory",
757
891
  type=str,
758
892
  default=None,
759
- help="Container memory limit (e.g., 2g, 512m, 4096m). Default: no limit",
893
+ help="Container memory limit (e.g., 2g, 512m, 4096m). Overrides --profile setting",
894
+ )
895
+
896
+ parser.add_argument(
897
+ "--cpus",
898
+ type=int,
899
+ default=None,
900
+ help="Number of CPUs to allocate to container. Overrides --profile setting",
901
+ )
902
+
903
+ parser.add_argument(
904
+ "--profile",
905
+ type=str,
906
+ choices=["minimal", "default", "heavy"],
907
+ default="default",
908
+ help="Resource profile for container: minimal (4 CPU, 8GB), default (6 CPU, 12GB), heavy (8 CPU, 16GB)",
909
+ )
910
+
911
+ parser.add_argument(
912
+ "--skip-memory-check",
913
+ action="store_true",
914
+ help="Skip memory availability validation (not recommended)",
760
915
  )
761
916
 
762
917
  # Utility options
@@ -959,7 +1114,27 @@ def main() -> int:
959
1114
 
960
1115
  try:
961
1116
  if not args.quiet:
962
- print(f"Starting docling-serve container...")
1117
+ print(f"Starting docling-serve container...\n")
1118
+
1119
+ # Apply resource profile
1120
+ profile = RESOURCE_PROFILES[args.profile]
1121
+ cpus = args.cpus if args.cpus is not None else profile["cpus"]
1122
+ memory = args.memory if args.memory is not None else profile["memory"]
1123
+
1124
+ # Validate memory availability unless skipped
1125
+ if not args.skip_memory_check:
1126
+ required_gb = parse_memory_string(memory)
1127
+ is_sufficient, error_msg = validate_memory_availability(
1128
+ required_gb, profile_name=args.profile
1129
+ )
1130
+ if not is_sufficient:
1131
+ print(f"Error: {error_msg}", file=sys.stderr)
1132
+ return 1
1133
+
1134
+ if not args.quiet:
1135
+ print(f"Resource profile: {args.profile} ({cpus} CPUs, {memory} memory)")
1136
+ if args.cpus or args.memory:
1137
+ print(" (customized via command-line arguments)")
963
1138
  print()
964
1139
 
965
1140
  with DoclingContainer(
@@ -968,7 +1143,8 @@ def main() -> int:
968
1143
  args.port,
969
1144
  timeout=timeout,
970
1145
  keep_container=DEBUG,
971
- memory=args.memory,
1146
+ memory=memory,
1147
+ cpus=cpus,
972
1148
  ) as container:
973
1149
  # Convert files
974
1150
  conversion_start = time.time()
mdify/container.py CHANGED
@@ -28,6 +28,7 @@ class DoclingContainer:
28
28
  timeout: int = 1200,
29
29
  keep_container: bool = False,
30
30
  memory: Optional[str] = None,
31
+ cpus: Optional[int] = None,
31
32
  ):
32
33
  """Initialize container manager.
33
34
 
@@ -38,6 +39,7 @@ class DoclingContainer:
38
39
  timeout: Conversion timeout in seconds (default: 1200)
39
40
  keep_container: If True, do not auto-remove container (preserve logs)
40
41
  memory: Memory limit (e.g., "2g", "512m"). None for no limit.
42
+ cpus: Number of CPUs to allocate. None for no limit.
41
43
  """
42
44
  self.runtime = runtime
43
45
  self.image = image
@@ -45,6 +47,7 @@ class DoclingContainer:
45
47
  self.timeout = timeout
46
48
  self.keep_container = keep_container
47
49
  self.memory = memory
50
+ self.cpus = cpus
48
51
  self.container_name = f"mdify-serve-{uuid.uuid4().hex[:8]}"
49
52
  self.container_id: Optional[str] = None
50
53
 
@@ -114,7 +117,11 @@ class DoclingContainer:
114
117
  if not self.keep_container:
115
118
  cmd.insert(3, "--rm") # Auto-remove on stop
116
119
 
117
- # Add memory limit if specified
120
+ # Add resource limits if specified
121
+ if self.cpus:
122
+ cmd.insert(3, str(self.cpus))
123
+ cmd.insert(3, "--cpus")
124
+
118
125
  if self.memory:
119
126
  cmd.insert(3, self.memory)
120
127
  cmd.insert(3, "-m")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.11.7
3
+ Version: 2.11.9
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -0,0 +1,12 @@
1
+ assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
+ mdify/__init__.py,sha256=x2PxT1laVq9WFwgXBDy1TJ_qCOBN4cxlmLYbSBcb7qA,91
3
+ mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
+ mdify/cli.py,sha256=Mv3ClwC84fkorZgwM1IqGMvZ0-hT_V77qhHo2p0ueCU,49638
5
+ mdify/container.py,sha256=ARdFs-TOSh5vHGtBJ0CppfpZFaiprIuRdQ5wDH0NfrY,8377
6
+ mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
7
+ mdify_cli-2.11.9.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
8
+ mdify_cli-2.11.9.dist-info/METADATA,sha256=NHwtbgGo2CAPqZIOT7ebPk5mTwsmxRBo5pg0l71xenE,9623
9
+ mdify_cli-2.11.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
+ mdify_cli-2.11.9.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
11
+ mdify_cli-2.11.9.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
12
+ mdify_cli-2.11.9.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
- mdify/__init__.py,sha256=QROoTzZ7DPEkOz5xppOPbt6mrhz2S9R4qPalnU_JuXY,91
3
- mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
- mdify/cli.py,sha256=Bq6E-U-TMBDPPduHz6pOfFcVHPGSOhrwsvxVOTnA8KA,43261
5
- mdify/container.py,sha256=TEVfWXVQoF8OdMX3K_X460K4sJ59ysVOOy5z5E1RH84,8139
6
- mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
7
- mdify_cli-2.11.7.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
8
- mdify_cli-2.11.7.dist-info/METADATA,sha256=ksvj2cFwDeDb36tYVfUe3YpHp306Mt4QyON62voR9s0,9623
9
- mdify_cli-2.11.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
- mdify_cli-2.11.7.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
11
- mdify_cli-2.11.7.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
12
- mdify_cli-2.11.7.dist-info/RECORD,,