nutricare-data-packages 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ Metadata-Version: 2.4
2
+ Name: nutricare-data-packages
3
+ Version: 0.1.0
4
+ Summary: Nutricare data access package
5
+ Author: Nutricare Team
6
+ Project-URL: Homepage, https://example.com
7
+ Project-URL: Repository, https://example.com/nutricare-data-packages
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: pymongo>=4.0.0
11
+
12
+ # nutricare-data-packages
13
+
14
+ Nutricare 的数据访问基础包,封装 MongoDB 仓储与本地存储抽象基类。
15
+
16
+ ## 安装
17
+
18
+ ```bash
19
+ pip install nutricare-data-packages
20
+ ```
21
+
22
+ ## 快速使用
23
+
24
+ ```python
25
+ from pathlib import Path
26
+
27
+ from nutricare_data_packages import MetadataStore, StorageBase
28
+
29
+ store = MetadataStore(
30
+ mongodb_url="mongodb://localhost:27017",
31
+ db_name="nutricare",
32
+ auth_source="admin",
33
+ )
34
+
35
+ class ReportStorage(StorageBase):
36
+ @property
37
+ def storage_subdir(self) -> str:
38
+ return "reports"
39
+
40
+ def _write_impl(self, relative_path: str, data: bytes) -> Path:
41
+ target = self.resolve_path(relative_path)
42
+ target.parent.mkdir(parents=True, exist_ok=True)
43
+ target.write_bytes(data)
44
+ return target
45
+ ```
46
+
47
+ ## 本地构建与发布
48
+
49
+ ```bash
50
+ python -m pip install --upgrade build twine
51
+ python -m build
52
+ python -m twine check dist/*
53
+ python -m twine upload dist/*
54
+ ```
55
+
56
+ ## 说明
57
+
58
+ - 包名(pip 安装名):`nutricare-data-packages`
59
+ - 导入名(Python import):`nutricare_data_packages`
@@ -0,0 +1,48 @@
1
+ # nutricare-data-packages
2
+
3
+ Nutricare 的数据访问基础包,封装 MongoDB 仓储与本地存储抽象基类。
4
+
5
+ ## 安装
6
+
7
+ ```bash
8
+ pip install nutricare-data-packages
9
+ ```
10
+
11
+ ## 快速使用
12
+
13
+ ```python
14
+ from pathlib import Path
15
+
16
+ from nutricare_data_packages import MetadataStore, StorageBase
17
+
18
+ store = MetadataStore(
19
+ mongodb_url="mongodb://localhost:27017",
20
+ db_name="nutricare",
21
+ auth_source="admin",
22
+ )
23
+
24
+ class ReportStorage(StorageBase):
25
+ @property
26
+ def storage_subdir(self) -> str:
27
+ return "reports"
28
+
29
+ def _write_impl(self, relative_path: str, data: bytes) -> Path:
30
+ target = self.resolve_path(relative_path)
31
+ target.parent.mkdir(parents=True, exist_ok=True)
32
+ target.write_bytes(data)
33
+ return target
34
+ ```
35
+
36
+ ## 本地构建与发布
37
+
38
+ ```bash
39
+ python -m pip install --upgrade build twine
40
+ python -m build
41
+ python -m twine check dist/*
42
+ python -m twine upload dist/*
43
+ ```
44
+
45
+ ## 说明
46
+
47
+ - 包名(pip 安装名):`nutricare-data-packages`
48
+ - 导入名(Python import):`nutricare_data_packages`
@@ -0,0 +1,47 @@
1
+ """Nutricare data packages."""
2
+
3
+ from .metadata_storage import (
4
+ INDEX_KEYS,
5
+ KEY_CONTENT_HASH,
6
+ KEY_CREATED_AT,
7
+ KEY_MD5,
8
+ KEY_METADATA,
9
+ KEY_MIME_TYPE,
10
+ KEY_PHASH,
11
+ KEY_RELATIVE_PATH,
12
+ KEY_SIZE,
13
+ KEY_STATUS,
14
+ KEY_STORAGE_SUBDIR,
15
+ KEY_TASK_ID,
16
+ KEY_TICKET_ID,
17
+ KEY_UPDATED_AT,
18
+ METADATA_COLLECTION,
19
+ MetadataStore,
20
+ make_document,
21
+ make_filter,
22
+ )
23
+ from .mongo_repository_base import MongoRepositoryBase
24
+ from .storage_base import StorageBase
25
+
26
+ __all__ = [
27
+ "MongoRepositoryBase",
28
+ "MetadataStore",
29
+ "StorageBase",
30
+ "METADATA_COLLECTION",
31
+ "INDEX_KEYS",
32
+ "KEY_STORAGE_SUBDIR",
33
+ "KEY_RELATIVE_PATH",
34
+ "KEY_TICKET_ID",
35
+ "KEY_TASK_ID",
36
+ "KEY_SIZE",
37
+ "KEY_CONTENT_HASH",
38
+ "KEY_MD5",
39
+ "KEY_PHASH",
40
+ "KEY_MIME_TYPE",
41
+ "KEY_STATUS",
42
+ "KEY_METADATA",
43
+ "KEY_CREATED_AT",
44
+ "KEY_UPDATED_AT",
45
+ "make_document",
46
+ "make_filter",
47
+ ]
@@ -0,0 +1,237 @@
1
+ # 元数据存储 - 定义 doc_metadata 表结构、存储与更新方法
2
+
3
+ from datetime import datetime, timezone
4
+ from typing import Any, Optional
5
+
6
+ from pymongo.collection import Collection
7
+ from pymongo.cursor import Cursor
8
+ from pymongo.results import DeleteResult, InsertOneResult, UpdateResult
9
+
10
+ from .mongo_repository_base import MongoRepositoryBase
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # 集合名与表结构:字段名常量与说明
14
+ # ---------------------------------------------------------------------------
15
+
16
+ # 元数据集合名(MongoDB collection)
17
+ METADATA_COLLECTION = "doc_metadata"
18
+
19
+ # 唯一键:存储子目录 + 相对路径,用于 upsert
20
+ KEY_STORAGE_SUBDIR = "storage_subdir"
21
+ KEY_RELATIVE_PATH = "relative_path"
22
+
23
+ # 文档字段
24
+ KEY_TICKET_ID = "ticket_id"
25
+ KEY_TASK_ID = "task_id"
26
+ KEY_SIZE = "size"
27
+ KEY_CONTENT_HASH = "content_hash"
28
+ KEY_MD5 = "md5"
29
+ KEY_PHASH = "phash"
30
+ KEY_MIME_TYPE = "mime_type"
31
+ KEY_STATUS = "status"
32
+ KEY_METADATA = "metadata"
33
+ KEY_CREATED_AT = "created_at"
34
+ KEY_UPDATED_AT = "updated_at"
35
+
36
+ # 建议唯一索引:(storage_subdir, relative_path)
37
+ INDEX_KEYS = [KEY_STORAGE_SUBDIR, KEY_RELATIVE_PATH]
38
+
39
+
40
+ def _now_utc() -> datetime:
41
+ """当前 UTC 时间。"""
42
+ return datetime.now(timezone.utc)
43
+
44
+
45
+ def make_document(
46
+ *,
47
+ storage_subdir: str,
48
+ relative_path: str,
49
+ size: Optional[int] = None,
50
+ content_hash: Optional[str] = None,
51
+ md5: Optional[str] = None,
52
+ phash: Optional[str] = None,
53
+ mime_type: Optional[str] = None,
54
+ ticket_id: Optional[str] = None,
55
+ task_id: Optional[str] = None,
56
+ status: Optional[str] = None,
57
+ metadata: Optional[dict[str, Any]] = None,
58
+ created_at: Optional[datetime] = None,
59
+ updated_at: Optional[datetime] = None,
60
+ ) -> dict[str, Any]:
61
+ """
62
+ 构造符合元数据表结构的文档(不包含 _id)。
63
+ 用于存储前校验与统一时间戳。
64
+ """
65
+ now = _now_utc()
66
+ doc: dict[str, Any] = {
67
+ KEY_STORAGE_SUBDIR: storage_subdir.strip(),
68
+ KEY_RELATIVE_PATH: relative_path.replace("\\", "/").strip().lstrip("/"),
69
+ KEY_CREATED_AT: created_at or now,
70
+ KEY_UPDATED_AT: updated_at or now,
71
+ }
72
+ if size is not None:
73
+ doc[KEY_SIZE] = size
74
+ if content_hash is not None and str(content_hash).strip():
75
+ doc[KEY_CONTENT_HASH] = content_hash.strip()
76
+ if md5 is not None and str(md5).strip():
77
+ doc[KEY_MD5] = md5.strip()
78
+ if phash is not None and str(phash).strip():
79
+ doc[KEY_PHASH] = phash.strip()
80
+ if mime_type is not None and str(mime_type).strip():
81
+ doc[KEY_MIME_TYPE] = mime_type.strip()
82
+ if ticket_id is not None and str(ticket_id).strip():
83
+ doc[KEY_TICKET_ID] = ticket_id.strip()
84
+ if task_id is not None and str(task_id).strip():
85
+ doc[KEY_TASK_ID] = task_id.strip()
86
+ if status is not None and str(status).strip():
87
+ doc[KEY_STATUS] = status.strip()
88
+ if metadata is not None and isinstance(metadata, dict):
89
+ doc[KEY_METADATA] = metadata
90
+ return doc
91
+
92
+
93
+ def make_filter(storage_subdir: str, relative_path: str) -> dict[str, Any]:
94
+ """按存储子目录 + 相对路径构造查询条件。"""
95
+ return {
96
+ KEY_STORAGE_SUBDIR: storage_subdir.strip(),
97
+ KEY_RELATIVE_PATH: relative_path.replace("\\", "/").strip().lstrip("/"),
98
+ }
99
+
100
+
101
+ class MetadataStore:
102
+ """
103
+ 元数据存储:基于 METADATA_COLLECTION 的增删改查封装。
104
+ 通过连接参数在内部实例化 MongoRepositoryBase。
105
+ """
106
+
107
+ def __init__(
108
+ self,
109
+ *,
110
+ mongodb_url: str,
111
+ db_name: str,
112
+ auth_source: Optional[str] = None,
113
+ ) -> None:
114
+ if not mongodb_url or not str(mongodb_url).strip():
115
+ raise ValueError("mongodb_url 不能为空")
116
+ if not db_name or not str(db_name).strip():
117
+ raise ValueError("db_name 不能为空")
118
+ self._repo = MongoRepositoryBase(
119
+ mongodb_url=mongodb_url.strip(),
120
+ db_name=db_name.strip(),
121
+ collection_name=METADATA_COLLECTION,
122
+ auth_source=auth_source,
123
+ )
124
+ self._coll = self._repo.collection
125
+
126
+ @property
127
+ def collection(self) -> Collection:
128
+ """底层集合。"""
129
+ return self._coll
130
+
131
+ def save(
132
+ self,
133
+ *,
134
+ storage_subdir: str,
135
+ relative_path: str,
136
+ size: Optional[int] = None,
137
+ content_hash: Optional[str] = None,
138
+ md5: Optional[str] = None,
139
+ phash: Optional[str] = None,
140
+ mime_type: Optional[str] = None,
141
+ ticket_id: Optional[str] = None,
142
+ task_id: Optional[str] = None,
143
+ status: Optional[str] = None,
144
+ metadata: Optional[dict[str, Any]] = None,
145
+ ) -> InsertOneResult | UpdateResult:
146
+ """
147
+ 存储一条元数据。若 (storage_subdir, relative_path) 已存在则更新,否则插入。
148
+ 更新时自动刷新 updated_at,插入时设置 created_at / updated_at。
149
+ """
150
+ doc = make_document(
151
+ storage_subdir=storage_subdir,
152
+ relative_path=relative_path,
153
+ size=size,
154
+ content_hash=content_hash,
155
+ md5=md5,
156
+ phash=phash,
157
+ mime_type=mime_type,
158
+ ticket_id=ticket_id,
159
+ task_id=task_id,
160
+ status=status,
161
+ metadata=metadata,
162
+ )
163
+ flt = make_filter(storage_subdir, relative_path)
164
+ existing = self._coll.find_one(flt)
165
+ if existing:
166
+ update_doc = {
167
+ "$set": {
168
+ KEY_UPDATED_AT: _now_utc(),
169
+ **{k: v for k, v in doc.items() if k not in (KEY_CREATED_AT,)},
170
+ }
171
+ }
172
+ return self._coll.update_one(flt, update_doc)
173
+ return self._coll.insert_one(doc)
174
+
175
+ def update_by_path(
176
+ self,
177
+ storage_subdir: str,
178
+ relative_path: str,
179
+ *,
180
+ size_bytes: Optional[int] = None,
181
+ content_hash: Optional[str] = None,
182
+ md5: Optional[str] = None,
183
+ phash: Optional[str] = None,
184
+ mime_type: Optional[str] = None,
185
+ ticket_id: Optional[str] = None,
186
+ task_id: Optional[str] = None,
187
+ status: Optional[str] = None,
188
+ metadata: Optional[dict[str, Any]] = None,
189
+ ) -> UpdateResult:
190
+ """
191
+ 按 (storage_subdir, relative_path) 更新部分字段。
192
+ """
193
+ flt = make_filter(storage_subdir, relative_path)
194
+ set_payload: dict[str, Any] = {KEY_UPDATED_AT: _now_utc()}
195
+ if size_bytes is not None:
196
+ set_payload[KEY_SIZE] = size_bytes
197
+ if content_hash is not None:
198
+ set_payload[KEY_CONTENT_HASH] = content_hash.strip() if content_hash else None
199
+ if md5 is not None:
200
+ set_payload[KEY_MD5] = md5.strip() if md5 else None
201
+ if phash is not None:
202
+ set_payload[KEY_PHASH] = phash.strip() if phash else None
203
+ if mime_type is not None:
204
+ set_payload[KEY_MIME_TYPE] = mime_type.strip() if mime_type else None
205
+ if ticket_id is not None:
206
+ set_payload[KEY_TICKET_ID] = ticket_id.strip() if ticket_id else None
207
+ if task_id is not None:
208
+ set_payload[KEY_TASK_ID] = task_id.strip() if task_id else None
209
+ if status is not None:
210
+ set_payload[KEY_STATUS] = status.strip() if status else None
211
+ if metadata is not None:
212
+ set_payload[KEY_METADATA] = metadata
213
+ return self._coll.update_one(flt, {"$set": set_payload})
214
+
215
+ def update_one(
216
+ self, filter: dict[str, Any], update: dict[str, Any], **kwargs: Any
217
+ ) -> UpdateResult:
218
+ """透传底层 update_one,便于自定义更新。"""
219
+ return self._coll.update_one(filter, update, **kwargs)
220
+
221
+ def find_by_path(
222
+ self, storage_subdir: str, relative_path: str
223
+ ) -> Optional[dict[str, Any]]:
224
+ """按 (storage_subdir, relative_path) 查询单条元数据。"""
225
+ return self._coll.find_one(make_filter(storage_subdir, relative_path))
226
+
227
+ def list_by_storage_subdir(
228
+ self, storage_subdir: str, **kwargs: Any
229
+ ) -> Cursor[dict[str, Any]]:
230
+ """按 storage_subdir 查询该子目录下所有元数据(返回 Cursor)。"""
231
+ return self._coll.find({KEY_STORAGE_SUBDIR: storage_subdir.strip()}, **kwargs)
232
+
233
+ def delete_by_path(
234
+ self, storage_subdir: str, relative_path: str
235
+ ) -> DeleteResult:
236
+ """按 (storage_subdir, relative_path) 删除一条元数据。"""
237
+ return self._coll.delete_one(make_filter(storage_subdir, relative_path))
@@ -0,0 +1,98 @@
1
+ # MongoDB 连接与数据库访问(基类:子类需传入连接参数与集合名)
2
+
3
+ from typing import Any, Optional
4
+
5
+ from pymongo import MongoClient
6
+ from pymongo.collection import Collection
7
+ from pymongo.cursor import Cursor
8
+ from pymongo.results import DeleteResult, InsertOneResult, InsertManyResult, UpdateResult
9
+
10
+
11
+ class MongoRepositoryBase:
12
+ """
13
+ MongoDB 集合访问的基类。
14
+
15
+ 子类继承后,在实例化时必须传入:
16
+ - 数据库连接参数:mongodb_url、db_name,以及可选的 auth_source
17
+ - 集合名称:collection_name
18
+
19
+ 示例:
20
+ class MyRepo(MongoRepositoryBase):
21
+ def __init__(self):
22
+ super().__init__(
23
+ mongodb_url="mongodb://localhost:27017",
24
+ db_name="mydb",
25
+ collection_name="my_coll",
26
+ auth_source="admin", # 可选
27
+ )
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ *,
33
+ mongodb_url: str,
34
+ db_name: str,
35
+ collection_name: str,
36
+ auth_source: Optional[str] = None,
37
+ ) -> None:
38
+ if not mongodb_url or not str(mongodb_url).strip():
39
+ raise ValueError("mongodb_url 不能为空")
40
+ if not db_name or not str(db_name).strip():
41
+ raise ValueError("db_name 不能为空")
42
+ if not collection_name or not str(collection_name).strip():
43
+ raise ValueError("collection_name 不能为空")
44
+ kwargs: dict[str, Any] = {}
45
+ if auth_source and str(auth_source).strip():
46
+ kwargs["authSource"] = auth_source.strip()
47
+ self._client = MongoClient(mongodb_url.strip(), **kwargs)
48
+ self._db = self._client[db_name.strip()]
49
+ self._coll = self._db[collection_name.strip()]
50
+
51
+ @property
52
+ def collection(self) -> Collection:
53
+ """当前集合。"""
54
+ return self._coll
55
+
56
+ def find(self, filter: Optional[dict[str, Any]] = None, **kwargs: Any) -> Cursor:
57
+ """查询多条。filter 为 None 时查全部。"""
58
+ return self._coll.find(filter or {}, **kwargs)
59
+
60
+ def find_one(
61
+ self, filter: Optional[dict[str, Any]] = None, **kwargs: Any
62
+ ) -> Optional[dict[str, Any]]:
63
+ """查询单条。"""
64
+ return self._coll.find_one(filter or {}, **kwargs)
65
+
66
+ def insert_one(self, document: dict[str, Any], **kwargs: Any) -> InsertOneResult:
67
+ """新增一条。"""
68
+ return self._coll.insert_one(document, **kwargs)
69
+
70
+ def insert_many(
71
+ self, documents: list[dict[str, Any]], **kwargs: Any
72
+ ) -> InsertManyResult:
73
+ """新增多条。"""
74
+ return self._coll.insert_many(documents, **kwargs)
75
+
76
+ def update_one(
77
+ self, filter: dict[str, Any], update: dict[str, Any], **kwargs: Any
78
+ ) -> UpdateResult:
79
+ """修改一条。"""
80
+ return self._coll.update_one(filter, update, **kwargs)
81
+
82
+ def update_many(
83
+ self, filter: dict[str, Any], update: dict[str, Any], **kwargs: Any
84
+ ) -> UpdateResult:
85
+ """修改多条。"""
86
+ return self._coll.update_many(filter, update, **kwargs)
87
+
88
+ def delete_one(self, filter: dict[str, Any], **kwargs: Any) -> DeleteResult:
89
+ """删除一条。"""
90
+ return self._coll.delete_one(filter, **kwargs)
91
+
92
+ def delete_many(self, filter: dict[str, Any], **kwargs: Any) -> DeleteResult:
93
+ """删除多条。"""
94
+ return self._coll.delete_many(filter, **kwargs)
95
+
96
+ def aggregate(self, pipeline: list[dict[str, Any]], **kwargs: Any) -> Any:
97
+ """聚合查询。"""
98
+ return self._coll.aggregate(pipeline, **kwargs)
@@ -0,0 +1,193 @@
1
+ # 本地存储基类 - 约定存储位置与规则,子类继承后按约束执行
2
+
3
+ from abc import ABC, abstractmethod
4
+ import re
5
+ import threading
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from pymongo.collection import Collection
10
+ from pymongo.results import InsertOneResult, UpdateResult
11
+
12
+ from .metadata_storage import MetadataStore
13
+
14
+ DOCUMENTS_DIR = "/data/documents"
15
+
16
+ # 默认允许的相对路径:字母、数字、下划线、短横线、点、斜杠,禁止 ..
17
+ _RELATIVE_PATH_PATTERN = re.compile(r"^[a-zA-Z0-9_.\-/]+$")
18
+
19
+
20
+ class StorageBase(ABC):
21
+ """
22
+ 本地存储的顶级基类。
23
+
24
+ 约定:
25
+ 1. 存储位置:根路径为 DOCUMENTS_DIR,子类通过 storage_subdir 指定子目录。
26
+ 2. 存储规则:所有路径均为相对于「根路径/storage_subdir」的相对路径,禁止路径穿越(..)、绝对路径及非法字符。
27
+ 3. 子类继承本类后,必须通过 self._resolve(relative_path) 解析路径,不得绕过约束直接写文件。
28
+ 4. 子类必须实现 storage_subdir,指定存储子目录名。
29
+ 5. 基类中实例化元数据集合 METADATA_COLLECTION,子类可通过 metadata_collection 或 _metadata_repo 访问。
30
+ """
31
+
32
+ @property
33
+ @abstractmethod
34
+ def storage_subdir(self) -> str:
35
+ """子类必须实现:指定存储子目录名(不可为空)。"""
36
+ ...
37
+
38
+ def __init__(
39
+ self,
40
+ *,
41
+ mongodb_url: str,
42
+ db_name: str,
43
+ auth_source: Optional[str] = None,
44
+ ) -> None:
45
+ self._root = self._init_root()
46
+ self._metadata_repo = MetadataStore(
47
+ mongodb_url=mongodb_url,
48
+ db_name=db_name,
49
+ auth_source=auth_source,
50
+ )
51
+
52
+ @property
53
+ def metadata_collection(self) -> Collection:
54
+ """元数据集合(METADATA_COLLECTION)实例,用于文档元数据的增删改查。"""
55
+ return self._metadata_repo.collection
56
+
57
+ def _init_root(self) -> Path:
58
+ """根据配置与 storage_subdir 初始化存储根路径。"""
59
+ if not (self.storage_subdir and self.storage_subdir.strip()):
60
+ raise ValueError("子类必须指定非空的 storage_subdir")
61
+ root = Path(DOCUMENTS_DIR).resolve()
62
+ root = root / self.storage_subdir.strip()
63
+ root.mkdir(parents=True, exist_ok=True)
64
+ return root
65
+
66
+ def _resolve(self, relative_path: str) -> Path:
67
+ """
68
+ 将相对路径解析为绝对路径,并校验存储规则。
69
+
70
+ 规则:
71
+ - 路径必须相对于当前存储根(_root),不得包含 .. 或绝对路径。
72
+ - 仅允许字母、数字、下划线、短横线、点、斜杠(子类可覆盖 _check_relative_path 放宽或收紧)。
73
+
74
+ :param relative_path: 相对于存储根的路径,如 "20260228_013509_pubmed/main.py"
75
+ :return: 解析后的绝对路径
76
+ :raises ValueError: 路径非法(穿越根目录或不符合命名规则)
77
+ """
78
+ relative_path = relative_path.replace("\\", "/").strip().lstrip("/")
79
+ self._check_relative_path(relative_path)
80
+ full = (self._root / relative_path).resolve()
81
+ try:
82
+ full.relative_to(self._root.resolve())
83
+ except ValueError:
84
+ raise ValueError("路径不允许超出存储根目录") from None
85
+ return full
86
+
87
+ def _check_relative_path(self, relative_path: str) -> None:
88
+ """
89
+ 校验相对路径是否符合存储规则。子类可覆盖以自定义规则。
90
+
91
+ 默认:不允许空、不允许 ".."、不允许绝对路径形态、仅允许安全字符。
92
+ """
93
+ if not relative_path:
94
+ raise ValueError("相对路径不能为空")
95
+ if ".." in relative_path:
96
+ raise ValueError("相对路径不允许包含 ..")
97
+ if Path(relative_path).is_absolute() or relative_path.startswith("/"):
98
+ raise ValueError("相对路径不能为绝对路径")
99
+ if not _RELATIVE_PATH_PATTERN.match(relative_path):
100
+ raise ValueError("相对路径仅允许字母、数字、下划线、短横线、点和斜杠")
101
+
102
+ def get_root(self) -> Path:
103
+ """返回当前存储根目录的绝对路径。"""
104
+ return self._root
105
+
106
+ def resolve_path(self, relative_path: str) -> Path:
107
+ """
108
+ 公开方法:解析相对路径为绝对路径(遵守存储规则)。
109
+ 子类在实现读写时应使用此方法或 _resolve 获取目标路径。
110
+ """
111
+ return self._resolve(relative_path)
112
+
113
+ def write_metadata(
114
+ self,
115
+ *,
116
+ relative_path: str,
117
+ size: Optional[int] = None,
118
+ content_hash: Optional[str] = None,
119
+ md5: Optional[str] = None,
120
+ phash: Optional[str] = None,
121
+ mime_type: Optional[str] = None,
122
+ status: Optional[str] = None,
123
+ metadata: Optional[dict[str, object]] = None,
124
+ ) -> InsertOneResult | UpdateResult:
125
+ """
126
+ 写入一条文件元数据(按 storage_subdir + relative_path upsert)。
127
+ ticket_id 与 task_id 从当前线程上下文中获取,缺失则抛出异常。
128
+ """
129
+ thread = threading.current_thread()
130
+ ticket_id = getattr(thread, "ticket_id", None)
131
+ task_id = getattr(thread, "task_id", None)
132
+ if (not ticket_id or not str(ticket_id).strip()) and (
133
+ thread.name and thread.name.startswith("collection-")
134
+ ):
135
+ fallback_id = thread.name.split("collection-", 1)[1].strip()
136
+ if fallback_id:
137
+ ticket_id = fallback_id
138
+ if not task_id or not str(task_id).strip():
139
+ task_id = ticket_id
140
+ if not ticket_id or not str(ticket_id).strip():
141
+ raise ValueError("线程上下文缺少 ticket_id")
142
+ if not task_id or not str(task_id).strip():
143
+ raise ValueError("线程上下文缺少 task_id")
144
+
145
+ normalized = relative_path.replace("\\", "/").strip().lstrip("/")
146
+ self._check_relative_path(normalized)
147
+ return self._metadata_repo.save(
148
+ storage_subdir=self.storage_subdir,
149
+ relative_path=normalized,
150
+ size=size,
151
+ content_hash=content_hash,
152
+ md5=md5,
153
+ phash=phash,
154
+ mime_type=mime_type,
155
+ ticket_id=ticket_id,
156
+ task_id=task_id,
157
+ status=status,
158
+ metadata=metadata,
159
+ )
160
+
161
+ def _ensure_metadata_exists(self, relative_path: str) -> None:
162
+ """
163
+ 写入文件前校验元数据是否存在,不存在则抛异常。
164
+ """
165
+ normalized = relative_path.replace("\\", "/").strip().lstrip("/")
166
+ self._check_relative_path(normalized)
167
+ doc = self._metadata_repo.find_by_path(self.storage_subdir, normalized)
168
+ if not doc:
169
+ raise ValueError(f"未找到对应元数据,禁止写入文件: {normalized}")
170
+
171
+ def write(self, relative_path: str, data: bytes) -> Path:
172
+ """
173
+ 将数据写入相对路径。
174
+ 基类默认在写入前校验对应元数据已存在,再委托给子类实现实际写入。
175
+
176
+ :param relative_path: 相对于存储根的路径
177
+ :param data: 字节内容
178
+ :return: 写入后的绝对路径
179
+ """
180
+ self._ensure_metadata_exists(relative_path)
181
+ return self._write_impl(relative_path, data)
182
+
183
+ @abstractmethod
184
+ def _write_impl(self, relative_path: str, data: bytes) -> Path:
185
+ """子类实现:执行实际写入逻辑(无需重复元数据存在校验)。"""
186
+ ...
187
+
188
+ def exists(self, relative_path: str) -> bool:
189
+ """判断相对路径对应的文件或目录是否存在。子类可直接使用,也可覆盖。"""
190
+ try:
191
+ return self._resolve(relative_path).exists()
192
+ except ValueError:
193
+ return False
@@ -0,0 +1,59 @@
1
+ Metadata-Version: 2.4
2
+ Name: nutricare-data-packages
3
+ Version: 0.1.0
4
+ Summary: Nutricare data access package
5
+ Author: Nutricare Team
6
+ Project-URL: Homepage, https://example.com
7
+ Project-URL: Repository, https://example.com/nutricare-data-packages
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: pymongo>=4.0.0
11
+
12
+ # nutricare-data-packages
13
+
14
+ Nutricare 的数据访问基础包,封装 MongoDB 仓储与本地存储抽象基类。
15
+
16
+ ## 安装
17
+
18
+ ```bash
19
+ pip install nutricare-data-packages
20
+ ```
21
+
22
+ ## 快速使用
23
+
24
+ ```python
25
+ from pathlib import Path
26
+
27
+ from nutricare_data_packages import MetadataStore, StorageBase
28
+
29
+ store = MetadataStore(
30
+ mongodb_url="mongodb://localhost:27017",
31
+ db_name="nutricare",
32
+ auth_source="admin",
33
+ )
34
+
35
+ class ReportStorage(StorageBase):
36
+ @property
37
+ def storage_subdir(self) -> str:
38
+ return "reports"
39
+
40
+ def _write_impl(self, relative_path: str, data: bytes) -> Path:
41
+ target = self.resolve_path(relative_path)
42
+ target.parent.mkdir(parents=True, exist_ok=True)
43
+ target.write_bytes(data)
44
+ return target
45
+ ```
46
+
47
+ ## 本地构建与发布
48
+
49
+ ```bash
50
+ python -m pip install --upgrade build twine
51
+ python -m build
52
+ python -m twine check dist/*
53
+ python -m twine upload dist/*
54
+ ```
55
+
56
+ ## 说明
57
+
58
+ - 包名(pip 安装名):`nutricare-data-packages`
59
+ - 导入名(Python import):`nutricare_data_packages`
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ nutricare_data_packages/__init__.py
4
+ nutricare_data_packages/metadata_storage.py
5
+ nutricare_data_packages/mongo_repository_base.py
6
+ nutricare_data_packages/storage_base.py
7
+ nutricare_data_packages.egg-info/PKG-INFO
8
+ nutricare_data_packages.egg-info/SOURCES.txt
9
+ nutricare_data_packages.egg-info/dependency_links.txt
10
+ nutricare_data_packages.egg-info/requires.txt
11
+ nutricare_data_packages.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ nutricare_data_packages
@@ -0,0 +1,27 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "nutricare-data-packages"
7
+ version = "0.1.0"
8
+ description = "Nutricare data access package"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ authors = [
12
+ { name = "Nutricare Team" }
13
+ ]
14
+ dependencies = [
15
+ "pymongo>=4.0.0"
16
+ ]
17
+
18
+ [project.urls]
19
+ Homepage = "https://example.com"
20
+ Repository = "https://example.com/nutricare-data-packages"
21
+
22
+ [tool.setuptools]
23
+ include-package-data = true
24
+
25
+ [tool.setuptools.packages.find]
26
+ where = ["."]
27
+ include = ["nutricare_data_packages*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+