@aspan-corporation/ac-shared 1.2.30 → 1.2.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/utils/diary.d.ts +57 -0
- package/lib/utils/diary.js +148 -0
- package/lib/utils/index.d.ts +1 -0
- package/lib/utils/index.js +1 -0
- package/package.json +5 -1
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Diary entries are Markdown files stored in a dedicated diary bucket at keys
|
|
3
|
+
* `diary/YYYY/MM/YYYYMMDD.md`. They are indexed in the shared metadata table so
|
|
4
|
+
* they are searchable through the same interface as photos. Body text is made
|
|
5
|
+
* searchable by tokenising it into `ac:text:<word>` tags that ride the existing
|
|
6
|
+
* inverted-index search table.
|
|
7
|
+
*/
|
|
8
|
+
/** Key prefix for every diary object (and the `folder`/id namespace). */
|
|
9
|
+
export declare const DIARY_PREFIX = "diary/";
|
|
10
|
+
/** Marks a meta item as a diary entry (value "true"). */
|
|
11
|
+
export declare const TAG_DIARY_ENTRY = "ac:diary:entry";
|
|
12
|
+
/** Human title of the entry. */
|
|
13
|
+
export declare const TAG_DIARY_TITLE = "ac:diary:title";
|
|
14
|
+
/** Short body preview for card display (first ~160 chars, no markdown). */
|
|
15
|
+
export declare const TAG_DIARY_PREVIEW = "ac:diary:preview";
|
|
16
|
+
/** One per embedded photo: value is the referenced media key. */
|
|
17
|
+
export declare const TAG_DIARY_PHOTO = "ac:diary:photo";
|
|
18
|
+
/** Namespace for body word tokens: `ac:text:<word>`. */
|
|
19
|
+
export declare const TEXT_TOKEN_PREFIX = "ac:text:";
|
|
20
|
+
/** Max body characters surfaced in the preview tag. */
|
|
21
|
+
export declare const DIARY_PREVIEW_LENGTH = 160;
|
|
22
|
+
/** Upper bound on distinct word tokens written per entry (bounds item size). */
|
|
23
|
+
export declare const MAX_TEXT_TOKENS = 400;
|
|
24
|
+
/**
|
|
25
|
+
* Compute the diary object key for a date.
|
|
26
|
+
* diaryKey(new Date()) → today's key in the caller's LOCAL timezone
|
|
27
|
+
* diaryKey("2026-06-15") → "diary/2026/06/20260615.md" (literal Y-M-D)
|
|
28
|
+
*
|
|
29
|
+
* A `Date` is read with LOCAL components so "today" is the caller's wall-clock
|
|
30
|
+
* day (not the UTC day, which is a day ahead for users west of UTC in the
|
|
31
|
+
* evening). A `YYYY-MM-DD` string (e.g. from <input type="date">) is taken
|
|
32
|
+
* literally so the date round-trips exactly with no timezone shift.
|
|
33
|
+
*/
|
|
34
|
+
export declare const diaryKey: (date: Date | string) => string;
|
|
35
|
+
/** True when an id/key is a diary entry object. */
|
|
36
|
+
export declare const isDiaryKey: (key: string) => boolean;
|
|
37
|
+
/**
|
|
38
|
+
* Parse the date out of a diary key (the inverse of `diaryKey`).
|
|
39
|
+
* "diary/2026/06/20260615.md" → { year: 2026, month: 6, day: 15 }
|
|
40
|
+
* Returns undefined when the key isn't a recognised diary key.
|
|
41
|
+
*/
|
|
42
|
+
export declare const parseDiaryKeyDate: (key: string) => {
|
|
43
|
+
year: number;
|
|
44
|
+
month: number;
|
|
45
|
+
day: number;
|
|
46
|
+
} | undefined;
|
|
47
|
+
/**
|
|
48
|
+
* Tokenise diary markdown into a deduplicated set of searchable lowercase
|
|
49
|
+
* words. Strips markdown syntax (code fences, image/link targets, formatting),
|
|
50
|
+
* drops stop-words and tokens shorter than 2 chars, and caps the result at
|
|
51
|
+
* `MAX_TEXT_TOKENS` to bound the meta-item size. No stemming (exact-word match).
|
|
52
|
+
*/
|
|
53
|
+
export declare const tokenizeText: (markdown: string) => string[];
|
|
54
|
+
/** Plain-text preview: strip markdown, collapse whitespace, truncate. */
|
|
55
|
+
export declare const diaryPreview: (markdown: string) => string;
|
|
56
|
+
/** Extract the media keys of photos embedded as `` in the body. */
|
|
57
|
+
export declare const extractEmbeddedPhotoKeys: (markdown: string) => string[];
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Diary entries are Markdown files stored in a dedicated diary bucket at keys
|
|
3
|
+
* `diary/YYYY/MM/YYYYMMDD.md`. They are indexed in the shared metadata table so
|
|
4
|
+
* they are searchable through the same interface as photos. Body text is made
|
|
5
|
+
* searchable by tokenising it into `ac:text:<word>` tags that ride the existing
|
|
6
|
+
* inverted-index search table.
|
|
7
|
+
*/
|
|
8
|
+
/** Key prefix for every diary object (and the `folder`/id namespace). */
|
|
9
|
+
export const DIARY_PREFIX = "diary/";
|
|
10
|
+
/** Marks a meta item as a diary entry (value "true"). */
|
|
11
|
+
export const TAG_DIARY_ENTRY = "ac:diary:entry";
|
|
12
|
+
/** Human title of the entry. */
|
|
13
|
+
export const TAG_DIARY_TITLE = "ac:diary:title";
|
|
14
|
+
/** Short body preview for card display (first ~160 chars, no markdown). */
|
|
15
|
+
export const TAG_DIARY_PREVIEW = "ac:diary:preview";
|
|
16
|
+
/** One per embedded photo: value is the referenced media key. */
|
|
17
|
+
export const TAG_DIARY_PHOTO = "ac:diary:photo";
|
|
18
|
+
/** Namespace for body word tokens: `ac:text:<word>`. */
|
|
19
|
+
export const TEXT_TOKEN_PREFIX = "ac:text:";
|
|
20
|
+
/** Max body characters surfaced in the preview tag. */
|
|
21
|
+
export const DIARY_PREVIEW_LENGTH = 160;
|
|
22
|
+
/** Upper bound on distinct word tokens written per entry (bounds item size). */
|
|
23
|
+
export const MAX_TEXT_TOKENS = 400;
|
|
24
|
+
const pad2 = (n) => String(n).padStart(2, "0");
|
|
25
|
+
/**
|
|
26
|
+
* Compute the diary object key for a date.
|
|
27
|
+
* diaryKey(new Date()) → today's key in the caller's LOCAL timezone
|
|
28
|
+
* diaryKey("2026-06-15") → "diary/2026/06/20260615.md" (literal Y-M-D)
|
|
29
|
+
*
|
|
30
|
+
* A `Date` is read with LOCAL components so "today" is the caller's wall-clock
|
|
31
|
+
* day (not the UTC day, which is a day ahead for users west of UTC in the
|
|
32
|
+
* evening). A `YYYY-MM-DD` string (e.g. from <input type="date">) is taken
|
|
33
|
+
* literally so the date round-trips exactly with no timezone shift.
|
|
34
|
+
*/
|
|
35
|
+
export const diaryKey = (date) => {
|
|
36
|
+
let y;
|
|
37
|
+
let m;
|
|
38
|
+
let day;
|
|
39
|
+
if (typeof date === "string") {
|
|
40
|
+
const lit = /^(\d{4})-(\d{2})-(\d{2})/.exec(date);
|
|
41
|
+
if (lit) {
|
|
42
|
+
y = Number(lit[1]);
|
|
43
|
+
m = Number(lit[2]);
|
|
44
|
+
day = Number(lit[3]);
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
const d = new Date(date);
|
|
48
|
+
if (isNaN(d.getTime()))
|
|
49
|
+
throw new Error(`diaryKey: invalid date "${date}"`);
|
|
50
|
+
y = d.getFullYear();
|
|
51
|
+
m = d.getMonth() + 1;
|
|
52
|
+
day = d.getDate();
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
if (isNaN(date.getTime()))
|
|
57
|
+
throw new Error("diaryKey: invalid Date");
|
|
58
|
+
y = date.getFullYear();
|
|
59
|
+
m = date.getMonth() + 1;
|
|
60
|
+
day = date.getDate();
|
|
61
|
+
}
|
|
62
|
+
return `${DIARY_PREFIX}${y}/${pad2(m)}/${y}${pad2(m)}${pad2(day)}.md`;
|
|
63
|
+
};
|
|
64
|
+
/** True when an id/key is a diary entry object. */
|
|
65
|
+
export const isDiaryKey = (key) => key.startsWith(DIARY_PREFIX) && key.endsWith(".md");
|
|
66
|
+
/**
|
|
67
|
+
* Parse the date out of a diary key (the inverse of `diaryKey`).
|
|
68
|
+
* "diary/2026/06/20260615.md" → { year: 2026, month: 6, day: 15 }
|
|
69
|
+
* Returns undefined when the key isn't a recognised diary key.
|
|
70
|
+
*/
|
|
71
|
+
export const parseDiaryKeyDate = (key) => {
|
|
72
|
+
const m = /diary\/(\d{4})\/(\d{2})\/(\d{4})(\d{2})(\d{2})\.md$/.exec(key);
|
|
73
|
+
if (!m)
|
|
74
|
+
return undefined;
|
|
75
|
+
return { year: Number(m[3]), month: Number(m[4]), day: Number(m[5]) };
|
|
76
|
+
};
|
|
77
|
+
// A small English stop-word set — high-frequency words that add noise and bloat
|
|
78
|
+
// the index without improving recall for a personal diary.
|
|
79
|
+
const STOP_WORDS = new Set([
|
|
80
|
+
"the", "and", "for", "are", "but", "not", "you", "all", "any", "can", "had",
|
|
81
|
+
"her", "was", "one", "our", "out", "day", "get", "has", "him", "his", "how",
|
|
82
|
+
"its", "may", "new", "now", "old", "see", "two", "way", "who", "did", "yes",
|
|
83
|
+
"his", "she", "they", "them", "this", "that", "with", "have", "from", "your",
|
|
84
|
+
"were", "been", "their", "what", "when", "then", "than", "into", "just",
|
|
85
|
+
"like", "over", "also", "back", "after", "would", "could", "there", "here",
|
|
86
|
+
"about", "which", "while", "these", "those", "where", "very", "much", "some",
|
|
87
|
+
"such", "only", "more", "most", "will", "well", "went", "going", "got",
|
|
88
|
+
]);
|
|
89
|
+
/**
|
|
90
|
+
* Tokenise diary markdown into a deduplicated set of searchable lowercase
|
|
91
|
+
* words. Strips markdown syntax (code fences, image/link targets, formatting),
|
|
92
|
+
* drops stop-words and tokens shorter than 2 chars, and caps the result at
|
|
93
|
+
* `MAX_TEXT_TOKENS` to bound the meta-item size. No stemming (exact-word match).
|
|
94
|
+
*/
|
|
95
|
+
export const tokenizeText = (markdown) => {
|
|
96
|
+
if (!markdown)
|
|
97
|
+
return [];
|
|
98
|
+
const stripped = markdown
|
|
99
|
+
// fenced + inline code
|
|
100
|
+
.replace(/```[\s\S]*?```/g, " ")
|
|
101
|
+
.replace(/`[^`]*`/g, " ")
|
|
102
|
+
// image / link targets: keep the visible text, drop the URL/key
|
|
103
|
+
.replace(/!\[([^\]]*)\]\([^)]*\)/g, " $1 ")
|
|
104
|
+
.replace(/\[([^\]]*)\]\([^)]*\)/g, " $1 ")
|
|
105
|
+
// leftover markdown punctuation
|
|
106
|
+
.replace(/[#>*_~\-]+/g, " ");
|
|
107
|
+
const seen = new Set();
|
|
108
|
+
for (const raw of stripped.toLowerCase().split(/[^a-z0-9]+/)) {
|
|
109
|
+
if (raw.length < 2)
|
|
110
|
+
continue;
|
|
111
|
+
// Drop pure numbers (dates/counts add noise); keep alphanumerics like "v2".
|
|
112
|
+
if (/^\d+$/.test(raw))
|
|
113
|
+
continue;
|
|
114
|
+
if (STOP_WORDS.has(raw))
|
|
115
|
+
continue;
|
|
116
|
+
seen.add(raw);
|
|
117
|
+
if (seen.size >= MAX_TEXT_TOKENS)
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
return [...seen];
|
|
121
|
+
};
|
|
122
|
+
/** Plain-text preview: strip markdown, collapse whitespace, truncate. */
|
|
123
|
+
export const diaryPreview = (markdown) => {
|
|
124
|
+
const text = markdown
|
|
125
|
+
.replace(/```[\s\S]*?```/g, " ")
|
|
126
|
+
.replace(/`[^`]*`/g, " ")
|
|
127
|
+
.replace(/!\[([^\]]*)\]\([^)]*\)/g, " ")
|
|
128
|
+
.replace(/\[([^\]]*)\]\([^)]*\)/g, " $1 ")
|
|
129
|
+
.replace(/[#>*_~]+/g, " ")
|
|
130
|
+
.replace(/\s+/g, " ")
|
|
131
|
+
.trim();
|
|
132
|
+
return text.length > DIARY_PREVIEW_LENGTH
|
|
133
|
+
? text.slice(0, DIARY_PREVIEW_LENGTH).trimEnd() + "…"
|
|
134
|
+
: text;
|
|
135
|
+
};
|
|
136
|
+
/** Extract the media keys of photos embedded as `` in the body. */
|
|
137
|
+
export const extractEmbeddedPhotoKeys = (markdown) => {
|
|
138
|
+
const keys = new Set();
|
|
139
|
+
const re = /!\[[^\]]*\]\(([^)]+)\)/g;
|
|
140
|
+
let m;
|
|
141
|
+
while ((m = re.exec(markdown)) !== null) {
|
|
142
|
+
const key = m[1].trim();
|
|
143
|
+
// Only treat library media keys as photo links (ignore external URLs).
|
|
144
|
+
if (key && !/^https?:\/\//i.test(key))
|
|
145
|
+
keys.add(key);
|
|
146
|
+
}
|
|
147
|
+
return [...keys];
|
|
148
|
+
};
|
package/lib/utils/index.d.ts
CHANGED
package/lib/utils/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aspan-corporation/ac-shared",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.32",
|
|
4
4
|
"description": "",
|
|
5
5
|
"keywords": [],
|
|
6
6
|
"exports": {
|
|
@@ -23,6 +23,10 @@
|
|
|
23
23
|
"./utils/thumbsKey": {
|
|
24
24
|
"types": "./lib/utils/thumbsKey.d.ts",
|
|
25
25
|
"import": "./lib/utils/thumbsKey.js"
|
|
26
|
+
},
|
|
27
|
+
"./utils/diary": {
|
|
28
|
+
"types": "./lib/utils/diary.d.ts",
|
|
29
|
+
"import": "./lib/utils/diary.js"
|
|
26
30
|
}
|
|
27
31
|
},
|
|
28
32
|
"author": "",
|