cal-docs-server 3.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,449 @@
1
+ # ----------------------------------------------------------------------------------------
2
+ # index_docs
3
+ # ----------
4
+ #
5
+ # Scans the document directories and produces an index
6
+ #
7
+ # License
8
+ # -------
9
+ # MIT License - Copyright 2025-2026 Cyber Assessment Labs
10
+ #
11
+ # Authors
12
+ # -------
13
+ # bena
14
+ #
15
+ # Version History
16
+ # ---------------
17
+ # Mar 2024 - Created
18
+ # Dec 2025 - New version 2
19
+ # ----------------------------------------------------------------------------------------
20
+
21
+ # ----------------------------------------------------------------------------------------
22
+ # Imports
23
+ # ----------------------------------------------------------------------------------------
24
+
25
+ import os
26
+ import re
27
+ import sys
28
+ from typing import Any
29
+ from typing import TypedDict
30
+ from typing import cast
31
+ import json5
32
+ import yaml
33
+ from . import async_file
34
+
35
+ # ----------------------------------------------------------------------------------------
36
+ # Types
37
+ # ----------------------------------------------------------------------------------------
38
+
39
+ IndexList = list["IndexItemDict"]
40
+
41
+
42
+ class IndexItemDict(TypedDict):
43
+ name: str
44
+ directory_name: str
45
+ description: str
46
+ logo_ref: str | None
47
+ versions: list[IndexVersionItem]
48
+ latest_version_dir: str | None
49
+
50
+
51
+ class IndexVersionItem(TypedDict):
52
+ version: str
53
+ directory_name: str
54
+ url: str
55
+ download_url: str
56
+
57
+
58
+ class DocInfoFormat(TypedDict):
59
+ name: str
60
+ description: str
61
+ noindex: bool
62
+
63
+
64
+ # ----------------------------------------------------------------------------------------
65
+ # Functions
66
+ # ----------------------------------------------------------------------------------------
67
+
68
+
69
+ # ----------------------------------------------------------------------------------------
70
+ async def make_index(root_directory: str) -> IndexList:
71
+ """
72
+ Scans the `root_directory` of the documents and produces an Index list.
73
+ """
74
+
75
+ index_list: IndexList = []
76
+
77
+ all_dirs = await _list_directories(root_directory)
78
+ base_dirs = await _filter_non_versioned_dirs(all_dirs)
79
+
80
+ # Find any "orphaned" versioned directories (versioned dirs without a base dir)
81
+ orphaned_bases = await _find_orphaned_version_bases(all_dirs, base_dirs)
82
+
83
+ # Combine base_dirs with orphaned bases
84
+ all_base_dirs = list(set(base_dirs + orphaned_bases))
85
+ all_base_dirs.sort()
86
+
87
+ for dir in all_base_dirs:
88
+ # First get the versions for this package
89
+ versions = await _get_package_versions(root_directory, dir, all_dirs)
90
+
91
+ # Get package info (will use latest version dir if versions exist)
92
+ index_item = await _get_package_info(root_directory, dir, versions)
93
+ if index_item:
94
+ index_item["versions"] = versions
95
+ # Find the latest proper version (non-beta)
96
+ index_item["latest_version_dir"] = _get_latest_proper_version_dir(versions)
97
+ index_list.append(index_item)
98
+
99
+ return index_list
100
+
101
+
102
+ # ----------------------------------------------------------------------------------------
103
+ def get_version_redirects(index_list: IndexList) -> dict[str, str]:
104
+ """
105
+ Creates a mapping from unversioned package names to their latest version directories.
106
+ Returns a dict like {"calrep-sdk": "calrep-sdk-6.2.2"}
107
+ """
108
+ redirects: dict[str, str] = {}
109
+ for item in index_list:
110
+ if item["latest_version_dir"]:
111
+ # Only create redirect if there's a versioned directory to redirect to
112
+ redirects[item["directory_name"]] = item["latest_version_dir"]
113
+ return redirects
114
+
115
+
116
+ # ----------------------------------------------------------------------------------------
117
+ # Private Functions
118
+ # ----------------------------------------------------------------------------------------
119
+
120
+
121
+ # ----------------------------------------------------------------------------------------
122
+ async def _list_directories(directory: str) -> list[str]:
123
+ """
124
+ Returns a (non-recursive) list of all directories within `directory`. Does not
125
+ include directories starting with `.`. Returns empty list on error.
126
+ """
127
+
128
+ try:
129
+ all_entries: list[str] = os.listdir(directory)
130
+ except OSError as e:
131
+ print(f"Error listing directory {directory}: {e}", file=sys.stderr)
132
+ return []
133
+
134
+ just_dirs: list[str] = []
135
+
136
+ for entry in all_entries:
137
+ if not entry.startswith("."):
138
+ if os.path.isdir(os.path.join(directory, entry)):
139
+ just_dirs.append(entry)
140
+
141
+ return just_dirs
142
+
143
+
144
+ # ----------------------------------------------------------------------------------------
145
+ async def _filter_non_versioned_dirs(directories: list[str]) -> list[str]:
146
+ """
147
+ Takes a list of dir names and returns a new list that contains only base (ie not
148
+ versioned) names. Specifically directories that dont end with something like `-1.2.3`
149
+ """
150
+
151
+ regex1 = re.compile(r"-\d+[.\-+]?$")
152
+ regex2 = re.compile(r"-\d+[.\-+].*$")
153
+
154
+ base_names: list[str] = []
155
+ for name in directories:
156
+ if re.search(regex1, name) or re.search(regex2, name):
157
+ continue
158
+ base_names.append(name)
159
+
160
+ base_names.sort()
161
+
162
+ return base_names
163
+
164
+
165
+ # ----------------------------------------------------------------------------------------
166
+ async def _find_orphaned_version_bases(
167
+ all_dirs: list[str], base_dirs: list[str]
168
+ ) -> list[str]:
169
+ """
170
+ Finds "orphaned" versioned directories - directories that have version numbers but
171
+ no corresponding base directory. For example, if we have "pkg-1.0.0" and "pkg-2.0.0"
172
+ but no "pkg" directory, this returns ["pkg"] so we can create an index entry for it.
173
+ """
174
+ regex1 = re.compile(r"-\d+[.\-+]?$")
175
+ regex2 = re.compile(r"-\d+[.\-+].*$")
176
+
177
+ orphaned_bases: set[str] = set()
178
+
179
+ for dir_name in all_dirs:
180
+ # Check if this looks like a versioned directory
181
+ if re.search(regex1, dir_name) or re.search(regex2, dir_name):
182
+ # Extract the base name by finding the last "-" followed by version number
183
+ # Use the same regex to find where the version starts
184
+ match = re.search(r"-\d+[.\-+]", dir_name)
185
+ if match:
186
+ # Get everything before the version part
187
+ base_name = dir_name[: match.start()]
188
+
189
+ # Check if this base name exists in base_dirs
190
+ if base_name not in base_dirs:
191
+ orphaned_bases.add(base_name)
192
+
193
+ return sorted(list(orphaned_bases))
194
+
195
+
196
+ # ----------------------------------------------------------------------------------------
197
+ async def _get_package_info(
198
+ root_dir: str, dir: str, versions: list[IndexVersionItem]
199
+ ) -> IndexItemDict | None:
200
+ """
201
+ Fills out an IndexItemDict for the package at `root_dir/dir`. This will read the
202
+ description file if it exists.
203
+ If versions exist, it will use the latest proper version directory for metadata.
204
+ Otherwise it will use the unversioned base directory.
205
+ Note: This does not fill in the `versions` field
206
+ """
207
+
208
+ try:
209
+ # If we have versions, use the latest proper version for metadata
210
+ latest_version_dir = _get_latest_proper_version_dir(versions)
211
+ if latest_version_dir:
212
+ # Use the latest proper version directory for metadata
213
+ metadata_dir = os.path.join(root_dir, latest_version_dir)
214
+ else:
215
+ # No versions or no proper versions, use the base unversioned directory
216
+ metadata_dir = os.path.join(root_dir, dir)
217
+
218
+ index_html_file_name = os.path.join(metadata_dir, "index.html")
219
+ index_md_file_name = os.path.join(metadata_dir, "index.md")
220
+
221
+ if os.path.isfile(index_html_file_name) or os.path.isfile(index_md_file_name):
222
+ # Is a package so fill out an item.
223
+ logo_ref = await _get_logo_ref(metadata_dir)
224
+
225
+ index_item: IndexItemDict = {
226
+ "name": dir,
227
+ "directory_name": dir,
228
+ "description": "",
229
+ "logo_ref": logo_ref,
230
+ "versions": [],
231
+ "latest_version_dir": None,
232
+ }
233
+
234
+ try:
235
+ if doc_info := await _read_docinfo_file(metadata_dir):
236
+ # There was a docinfo file, so we can add some more details for the item
237
+ if doc_info["name"]:
238
+ index_item["name"] = doc_info["name"]
239
+ if doc_info["description"]:
240
+ index_item["description"] = doc_info["description"]
241
+
242
+ if doc_info["noindex"]:
243
+ # Special case, we have been asked to not index this. So lets bail.
244
+ return None
245
+ except Exception:
246
+ # If there was any error processing docinfo file we just skip it
247
+ print(
248
+ f"Failed trying to process docinfo file for {metadata_dir}",
249
+ file=sys.stderr,
250
+ )
251
+ pass
252
+
253
+ return index_item
254
+
255
+ except Exception:
256
+ # If we otherwise failed to process this directory then also just return a blank.
257
+ print(f"Failed to process package dir {dir}")
258
+ pass
259
+
260
+ return None
261
+
262
+
263
+ # ----------------------------------------------------------------------------------------
264
+ async def _read_docinfo_file(dir_path: str) -> DocInfoFormat | None:
265
+ """
266
+ Looks in `dir_path` for a document info file which can be one of the following files:
267
+ - "docinfo.json"
268
+ - "docinfo.json5"
269
+ - "docinfo.yml"
270
+ - "docinfo.yaml"
271
+
272
+ If it exists then it will be parsed and the info returned, otherwise a None will be
273
+ returned.
274
+ """
275
+
276
+ NAMES_LIST = [
277
+ ("docinfo.json", "json"),
278
+ ("docinfo.json5", "json"),
279
+ ("docinfo.yml", "yaml"),
280
+ ("docinfo.yaml", "yaml"),
281
+ ]
282
+
283
+ data: dict[str, Any] | None = None
284
+ for file_name, type in NAMES_LIST:
285
+ file_path = os.path.join(dir_path, file_name)
286
+ if os.path.isfile(file_path):
287
+ text = await _read_text_file(file_path)
288
+
289
+ if type == "json":
290
+ data = cast("dict[str, Any]", json5.loads(text))
291
+ break
292
+ elif type == "yaml":
293
+ data = yaml.safe_load(text)
294
+ break
295
+ else:
296
+ raise RuntimeError("Internal Error. Invalid value in constant list")
297
+
298
+ if data:
299
+ # We got the data from one of the files. So take the values we are interested in
300
+ # if they exist.
301
+ try:
302
+ info: DocInfoFormat = {
303
+ "name": data.get("name", ""),
304
+ "description": data.get("description", ""),
305
+ "noindex": bool(data.get("noindex", False)),
306
+ }
307
+
308
+ # Successfully got info so return it.
309
+ return info
310
+ except Exception:
311
+ # This is an unexpected format, so just disregard it.
312
+ pass
313
+
314
+ return None
315
+
316
+
317
+ # ----------------------------------------------------------------------------------------
318
+ async def _read_text_file(file_path: str) -> str:
319
+ """
320
+ Reads the contents of the text file at `file_path`
321
+ """
322
+
323
+ async with async_file.open_text(file_path, "r") as f:
324
+ text = await f.read()
325
+ return text
326
+
327
+
328
+ # ----------------------------------------------------------------------------------------
329
+ def _split_to_parts(string: str) -> list[str | int]:
330
+ """
331
+ Splits string into a list of parts where each part is either a string or a number
332
+ """
333
+ return [
334
+ int(text) if text.isdigit() else text.lower()
335
+ for text in re.split(r"(\d+)", string)
336
+ ]
337
+
338
+
339
+ # ----------------------------------------------------------------------------------------
340
+ def _is_proper_version(version_str: str) -> bool:
341
+ """
342
+ Checks if a version string is a proper semantic version (e.g., "1.2.3").
343
+ Returns False for beta, alpha, rc, or other pre-release versions.
344
+ """
345
+ # A proper version should only contain digits, dots, and optionally dashes followed by digits
346
+ # We want to exclude versions with letters like "b1", "alpha", "rc", etc.
347
+ # Also exclude versions with "+" which typically indicate build metadata
348
+ if "+" in version_str:
349
+ return False
350
+
351
+ # Check for common pre-release indicators
352
+ lower_version = version_str.lower()
353
+ if any(
354
+ indicator in lower_version
355
+ for indicator in ["alpha", "beta", "rc", "dev", "pre"]
356
+ ):
357
+ return False
358
+
359
+ # Check for "b" followed by digits (e.g., "1.2.0b1")
360
+ if re.search(r"[a-zA-Z]", version_str):
361
+ return False
362
+
363
+ return True
364
+
365
+
366
+ # ----------------------------------------------------------------------------------------
367
+ def _get_latest_proper_version_dir(versions: list[IndexVersionItem]) -> str | None:
368
+ """
369
+ Finds the latest proper version (non-beta, non-alpha, etc.) from the versions list.
370
+ Returns the directory_name of that version, or None if no proper versions exist.
371
+ """
372
+ # Filter to only proper versions
373
+ proper_versions = [v for v in versions if _is_proper_version(v["version"])]
374
+
375
+ if not proper_versions:
376
+ return None
377
+
378
+ # Sort by version number (highest first)
379
+ proper_versions.sort(key=lambda x: _split_to_parts(x["version"]), reverse=True)
380
+
381
+ # Return the directory of the highest version
382
+ return proper_versions[0]["directory_name"]
383
+
384
+
385
+ # ----------------------------------------------------------------------------------------
386
+ async def _get_package_versions(
387
+ root_dir: str, package_name: str, folder_names: list[str]
388
+ ) -> list[IndexVersionItem]:
389
+ """
390
+ Looks for any versioned folders for `package_name` and returns them in the list.
391
+ """
392
+
393
+ versions: list[IndexVersionItem] = []
394
+
395
+ for dir_name in folder_names:
396
+ if dir_name.startswith(package_name + "-"):
397
+ extra = dir_name[len(package_name) + 1 :]
398
+ version_info: IndexVersionItem = {
399
+ "version": extra,
400
+ "directory_name": dir_name,
401
+ "url": f"/{dir_name}/",
402
+ "download_url": f"/api/download/{package_name}/{extra}",
403
+ }
404
+ # Check to see if its got a noindex flag
405
+ if not await _does_dir_have_noindex(os.path.join(root_dir, dir_name)):
406
+ versions.append(version_info)
407
+
408
+ versions.sort(key=lambda x: _split_to_parts(x["version"]), reverse=True)
409
+ return versions
410
+
411
+
412
+ # ----------------------------------------------------------------------------------------
413
+ async def _does_dir_have_noindex(dir: str) -> bool:
414
+ """
415
+ Checks in the directory for the docinfo file and zees if it has the noindex flag.
416
+ """
417
+
418
+ info = await _read_docinfo_file(dir)
419
+ if info and info.get("noindex", ""):
420
+ return True
421
+ return False
422
+
423
+
424
+ # ----------------------------------------------------------------------------------------
425
+ async def _get_logo_ref(dir: str) -> str | None:
426
+ """
427
+ Looks for the file `docinfo.png` in the directory and if it exists returns an image
428
+ source ref for it. Otherwise None
429
+ """
430
+
431
+ logo_file = os.path.join(dir, "docinfo.png")
432
+ if os.path.isfile(logo_file):
433
+ base_name = os.path.basename(dir)
434
+ ref = f"{base_name}/docinfo.png"
435
+ return ref
436
+
437
+ logo_file = os.path.join(dir, "docinfo.jpg")
438
+ if os.path.isfile(logo_file):
439
+ base_name = os.path.basename(dir)
440
+ ref = f"{base_name}/docinfo.jpg"
441
+ return ref
442
+
443
+ logo_file = os.path.join(dir, "docinfo.svg")
444
+ if os.path.isfile(logo_file):
445
+ base_name = os.path.basename(dir)
446
+ ref = f"{base_name}/docinfo.svg"
447
+ return ref
448
+
449
+ return None
@@ -0,0 +1,191 @@
1
+ # ----------------------------------------------------------------------------------------
2
+ # main
3
+ # ----
4
+ #
5
+ # Starting point for the asyncio
6
+ #
7
+ # License
8
+ # -------
9
+ # MIT License - Copyright 2025-2026 Cyber Assessment Labs
10
+ #
11
+ # Authors
12
+ # -------
13
+ # bena
14
+ #
15
+ # Version History
16
+ # ---------------
17
+ # Mar 2024 - Created
18
+ # Dec 2025 - New version 2
19
+ # ----------------------------------------------------------------------------------------
20
+
21
+ # ----------------------------------------------------------------------------------------
22
+ # Imports
23
+ # ----------------------------------------------------------------------------------------
24
+
25
+ import argparse
26
+ import asyncio
27
+ import json
28
+ import os
29
+ import sys
30
+ from typing import Any
31
+ from . import index_docs
32
+ from . import render_index
33
+ from . import version
34
+ from . import web_server
35
+
36
+ # ----------------------------------------------------------------------------------------
37
+ # Constants
38
+ # ----------------------------------------------------------------------------------------
39
+
40
+ DEFAULT_CONFIG_DISPLAY = "~/.config/cal-docs-server/config.json"
41
+ DEFAULT_CONFIG_PATH = os.path.expanduser(DEFAULT_CONFIG_DISPLAY)
42
+
43
+ # ----------------------------------------------------------------------------------------
44
+ # Functions
45
+ # ----------------------------------------------------------------------------------------
46
+
47
+
48
+ # ----------------------------------------------------------------------------------------
49
+ def _load_config(config_path: str | None) -> dict[str, Any]:
50
+ """
51
+ Loads configuration from a JSON file.
52
+ If config_path is None, tries to load from DEFAULT_CONFIG_PATH.
53
+ Returns empty dict if file doesn't exist (only for default path).
54
+ """
55
+
56
+ path = config_path or DEFAULT_CONFIG_PATH
57
+ is_explicit = config_path is not None
58
+
59
+ if not os.path.isfile(path):
60
+ if is_explicit:
61
+ print(f"Error: Config file not found: {path}", file=sys.stderr)
62
+ sys.exit(1)
63
+ return {}
64
+
65
+ try:
66
+ with open(path) as f:
67
+ config: dict[str, Any] = json.load(f)
68
+ return config
69
+ except json.JSONDecodeError as e:
70
+ print(f"Error: Invalid JSON in config file {path}: {e}", file=sys.stderr)
71
+ sys.exit(1)
72
+
73
+
74
+ # ----------------------------------------------------------------------------------------
75
+ async def main(argv: list[str]) -> int:
76
+ """
77
+ Main function
78
+ """
79
+
80
+ parser = argparse.ArgumentParser(
81
+ prog="cal-docs-server",
82
+ description=(
83
+ "CAL Documentation Server - A lightweight web server for hosting "
84
+ "versioned documentation with automatic indexing, markdown support, "
85
+ "and a REST API for programmatic access and uploads."
86
+ ),
87
+ epilog=(
88
+ "Configuration file: Options can be specified in a JSON config file. The"
89
+ f" server looks for {DEFAULT_CONFIG_DISPLAY} by default. Use --config to"
90
+ " specify a different file. CLI arguments override config file values."
91
+ ),
92
+ )
93
+ parser.add_argument(
94
+ "--version", action="version", version=f"cal-docs-server {version.VERSION_STR}"
95
+ )
96
+ parser.add_argument(
97
+ "--config",
98
+ "-c",
99
+ metavar="PATH",
100
+ help=(
101
+ f"path to JSON config file (default: {DEFAULT_CONFIG_DISPLAY}). "
102
+ "Config file options: docs, port, name, tokens, base_url"
103
+ ),
104
+ )
105
+ parser.add_argument(
106
+ "--port",
107
+ "-p",
108
+ type=int,
109
+ metavar="PORT",
110
+ help="port to listen on (default: 80)",
111
+ )
112
+ parser.add_argument(
113
+ "--docs",
114
+ "-d",
115
+ metavar="PATH",
116
+ help="path to documentation root directory (required)",
117
+ )
118
+ parser.add_argument(
119
+ "--name",
120
+ "-n",
121
+ metavar="NAME",
122
+ help="server name displayed on index page (default: Documents Server)",
123
+ )
124
+ parser.add_argument(
125
+ "--json",
126
+ "-j",
127
+ action="store_true",
128
+ help="output documentation index as JSON to stdout instead of running server",
129
+ )
130
+ parser.add_argument(
131
+ "--html",
132
+ "-H",
133
+ action="store_true",
134
+ help="output index page HTML to stdout instead of running server",
135
+ )
136
+ parser.add_argument(
137
+ "--tokens",
138
+ "-t",
139
+ metavar="PATH",
140
+ help="path to JSON file containing API tokens for upload authentication",
141
+ )
142
+ parser.add_argument(
143
+ "--base-url",
144
+ "-b",
145
+ metavar="URL",
146
+ help="base URL for links in API responses (e.g., https://docs.example.com)",
147
+ )
148
+ args = parser.parse_args(argv)
149
+
150
+ # Load config file (default or specified)
151
+ config = _load_config(args.config)
152
+
153
+ # Merge config with CLI args (CLI takes precedence)
154
+ # Map config keys to their defaults
155
+ docs = args.docs or config.get("docs")
156
+ port = args.port if args.port is not None else config.get("port", 80)
157
+ name = args.name or config.get("name", "Documents Server")
158
+ tokens = args.tokens or config.get("tokens")
159
+ base_url = args.base_url or config.get("base_url")
160
+
161
+ # Expand ~ in paths
162
+ if docs:
163
+ docs = os.path.expanduser(docs)
164
+ if tokens:
165
+ tokens = os.path.expanduser(tokens)
166
+
167
+ # Validate required options
168
+ if not docs:
169
+ parser.error("--docs is required (either via CLI or config file)")
170
+
171
+ if args.json:
172
+ index_list = await index_docs.make_index(docs)
173
+ index_json = json.dumps(index_list, indent=2)
174
+ print(index_json)
175
+ return 0
176
+ elif args.html:
177
+ index_html = await render_index.render_index(docs, server_name=name)
178
+ print(index_html)
179
+ return 0
180
+
181
+ await web_server.launch_web_server(
182
+ root_directory=docs,
183
+ port=port,
184
+ server_name=name,
185
+ tokens_file=tokens,
186
+ base_url=base_url,
187
+ )
188
+
189
+ # Continue until terminated
190
+ while True:
191
+ await asyncio.sleep(1)