hazo_files 2.0.1 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGE_LOG.md +19 -0
- package/README.md +111 -0
- package/dist/index.d.mts +96 -9
- package/dist/index.d.ts +96 -9
- package/dist/index.js +113 -2
- package/dist/index.mjs +107 -1
- package/package.json +1 -1
package/CHANGE_LOG.md
CHANGED
|
@@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## 2.1.1 — 2026-05-24
|
|
9
|
+
|
|
10
|
+
**Docs + housekeeping** — no API changes.
|
|
11
|
+
|
|
12
|
+
- README: added documentation for `NamingTemplate`, `writeWithCollisionRetry`, and the pure `addRef`/`removeRef`/`countRefs` helpers introduced in 2.1.0.
|
|
13
|
+
- `.gitignore`: untracked test-app runtime artifacts (`data/`, `logs/`, `tsconfig.tsbuildinfo`, `*.tgz`).
|
|
14
|
+
|
|
15
|
+
## 2.1.0 — 2026-05-24
|
|
16
|
+
|
|
17
|
+
**Additive** — no breaking changes from 2.0.x.
|
|
18
|
+
|
|
19
|
+
- `NamingTemplate` DSL with formatters (`slug`, `pad:N`, `upper`, `lower`, `truncate:N`, `date:FMT`). 200-char cap; sanitises `/\:*?"<>|` → `_`; throws on missing variable; dotted-path resolution. Formatters `pad` and `truncate` throw on missing/NaN arg; `date` throws on empty value.
|
|
20
|
+
- `writeWithCollisionRetry` helper with 999-attempt cap; throws `StorageCollisionExhausted`. Renders the `{index}` variable per attempt.
|
|
21
|
+
- Reference tracking helpers: `addRef` / `removeRef` / `countRefs` over `{ file_refs, ref_count }`. Idempotent on same `ref_id`; soft-tombstone via `removed_at`.
|
|
22
|
+
|
|
23
|
+
### Migration from 2.0.x
|
|
24
|
+
|
|
25
|
+
No required changes. Existing imports continue to work. Use the new helpers to compute filenames and track usage references.
|
|
26
|
+
|
|
8
27
|
## 2.0.1 (2026-05-23)
|
|
9
28
|
|
|
10
29
|
### Added
|
package/README.md
CHANGED
|
@@ -1564,6 +1564,117 @@ import type {
|
|
|
1564
1564
|
} from 'hazo_files';
|
|
1565
1565
|
```
|
|
1566
1566
|
|
|
1567
|
+
### Pure Reference Helpers (v2.1.0)
|
|
1568
|
+
|
|
1569
|
+
Functional helpers for manipulating `{ file_refs, ref_count }` objects without a database. Useful for in-memory state management or building your own persistence layer.
|
|
1570
|
+
|
|
1571
|
+
```typescript
|
|
1572
|
+
import { addRef, removeRef, countRefs } from 'hazo_files';
|
|
1573
|
+
import type { FileWithRefs, FileRef } from 'hazo_files';
|
|
1574
|
+
|
|
1575
|
+
// addRef — idempotent on same ref_id; returns updated object
|
|
1576
|
+
const updated = addRef(file, {
|
|
1577
|
+
ref_id: 'ref-123',
|
|
1578
|
+
ref_type: 'form_field',
|
|
1579
|
+
ref_source: 'form-abc',
|
|
1580
|
+
created_at: new Date().toISOString(),
|
|
1581
|
+
});
|
|
1582
|
+
// updated.ref_count === countRefs(updated)
|
|
1583
|
+
|
|
1584
|
+
// removeRef — soft-tombstones by setting removed_at; returns updated object
|
|
1585
|
+
const released = removeRef(file, 'ref-123', new Date().toISOString());
|
|
1586
|
+
// released.ref_count decremented; original ref_id still present with removed_at set
|
|
1587
|
+
|
|
1588
|
+
// countRefs — count active (non-tombstoned) references
|
|
1589
|
+
const active = countRefs(file); // number
|
|
1590
|
+
```
|
|
1591
|
+
|
|
1592
|
+
These helpers are **pure functions** — they never write to a database. Pair them with `TrackedFileManager.addRef` / `removeRef` when you need DB persistence.
|
|
1593
|
+
|
|
1594
|
+
## Naming Templates (v2.1.0)
|
|
1595
|
+
|
|
1596
|
+
`NamingTemplate` is a lightweight DSL for generating filenames and folder paths from string templates with variable substitution and pipe formatters.
|
|
1597
|
+
|
|
1598
|
+
### Basic Usage
|
|
1599
|
+
|
|
1600
|
+
```typescript
|
|
1601
|
+
import { NamingTemplate } from 'hazo_files';
|
|
1602
|
+
|
|
1603
|
+
const name = NamingTemplate.render(
|
|
1604
|
+
'{client_name|slug}_{date|date:YYYY-MM-DD}_{index|pad:3}.pdf',
|
|
1605
|
+
{ client_name: 'Acme Corp', date: '2026-05-24T00:00:00Z', index: 7 }
|
|
1606
|
+
);
|
|
1607
|
+
// → "acme-corp_2026-05-24_007.pdf"
|
|
1608
|
+
```
|
|
1609
|
+
|
|
1610
|
+
### Template Syntax
|
|
1611
|
+
|
|
1612
|
+
- **`{variable}`** — substitutes `variable` from the vars object
|
|
1613
|
+
- **`{variable|formatter}`** — applies a formatter after substitution
|
|
1614
|
+
- **`{variable|formatter:arg}`** — formatter with argument
|
|
1615
|
+
- **`{obj.nested.key}`** — dotted-path resolution into nested objects
|
|
1616
|
+
- Output is capped at **200 characters**; `/\:*?"<>|` are sanitised to `_`
|
|
1617
|
+
|
|
1618
|
+
### Formatters
|
|
1619
|
+
|
|
1620
|
+
| Formatter | Example | Description |
|
|
1621
|
+
|---|---|---|
|
|
1622
|
+
| `slug` | `{name\|slug}` | Lowercase, replace non-alphanumeric with `-`, strip leading/trailing `-` |
|
|
1623
|
+
| `upper` | `{code\|upper}` | Uppercase |
|
|
1624
|
+
| `lower` | `{code\|lower}` | Lowercase |
|
|
1625
|
+
| `pad:N` | `{index\|pad:3}` | Zero-pad to N digits (7 → `007`) |
|
|
1626
|
+
| `truncate:N` | `{title\|truncate:20}` | Trim to N characters |
|
|
1627
|
+
| `date:FMT` | `{ts\|date:YYYY-MM-DD}` | Format ISO date string; tokens: `YYYY`, `MM`, `DD`, `HH`, `mm`, `ss` |
|
|
1628
|
+
|
|
1629
|
+
### Error Conditions
|
|
1630
|
+
|
|
1631
|
+
- Missing variable → throws `Error: missing variable: <name>`
|
|
1632
|
+
- `pad` / `truncate` without numeric arg → throws
|
|
1633
|
+
- `date` formatter on empty/invalid value → throws
|
|
1634
|
+
- Unknown formatter → throws
|
|
1635
|
+
|
|
1636
|
+
## Collision-Safe Writes (v2.1.0)
|
|
1637
|
+
|
|
1638
|
+
`writeWithCollisionRetry` writes a file to any `FileStorageProvider`, automatically incrementing an `{index}` variable in the template until a free path is found.
|
|
1639
|
+
|
|
1640
|
+
### Usage
|
|
1641
|
+
|
|
1642
|
+
```typescript
|
|
1643
|
+
import { writeWithCollisionRetry, StorageCollisionExhausted } from 'hazo_files';
|
|
1644
|
+
|
|
1645
|
+
try {
|
|
1646
|
+
const result = await writeWithCollisionRetry({
|
|
1647
|
+
provider, // FileStorageProvider
|
|
1648
|
+
template: '{client|slug}_{date|date:YYYY-MM-DD}_{index|pad:3}.pdf',
|
|
1649
|
+
vars: { client: 'Acme Corp', date: new Date().toISOString() },
|
|
1650
|
+
body: pdfBuffer,
|
|
1651
|
+
path_prefix: 'uploads/forms', // optional — prepended to rendered name
|
|
1652
|
+
max_attempts: 999, // optional, default 999
|
|
1653
|
+
put_opts: { metadata: { ... } } // optional extra PutOpts (ifNotExists always set)
|
|
1654
|
+
});
|
|
1655
|
+
|
|
1656
|
+
console.log(result.logical_path);
|
|
1657
|
+
// → "uploads/forms/acme-corp_2026-05-24_001.pdf"
|
|
1658
|
+
// (or _002, _003, … if earlier slots were taken)
|
|
1659
|
+
} catch (err) {
|
|
1660
|
+
if (err instanceof StorageCollisionExhausted) {
|
|
1661
|
+
// All 999 attempts collided
|
|
1662
|
+
console.error(`Could not write after ${err.attempts} attempts`);
|
|
1663
|
+
}
|
|
1664
|
+
throw err;
|
|
1665
|
+
}
|
|
1666
|
+
```
|
|
1667
|
+
|
|
1668
|
+
### How It Works
|
|
1669
|
+
|
|
1670
|
+
1. Renders the template with `{ ...vars, index: 1 }` for the first attempt
|
|
1671
|
+
2. Calls `provider.put(path, body, { ifNotExists: true })`
|
|
1672
|
+
3. If the file exists, increments `index` and retries
|
|
1673
|
+
4. Returns `PutResult & { logical_path: string }` on the first successful write
|
|
1674
|
+
5. Throws `StorageCollisionExhausted` if all attempts are exhausted
|
|
1675
|
+
|
|
1676
|
+
The `{index}` variable is **automatically injected** — you don't need to supply it in `vars`.
|
|
1677
|
+
|
|
1567
1678
|
## File Change Detection
|
|
1568
1679
|
|
|
1569
1680
|
Detect file content changes using fast xxHash hashing.
|
package/dist/index.d.mts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
1
2
|
import { OAuth2Client } from 'google-auth-library';
|
|
2
3
|
|
|
3
4
|
/**
|
|
@@ -396,7 +397,7 @@ type FileRefVisibility = 'public' | 'private' | 'internal';
|
|
|
396
397
|
* A reference from an entity to a file.
|
|
397
398
|
* Multiple entities can reference the same file.
|
|
398
399
|
*/
|
|
399
|
-
interface FileRef {
|
|
400
|
+
interface FileRef$1 {
|
|
400
401
|
/** Unique ID for this reference */
|
|
401
402
|
ref_id: string;
|
|
402
403
|
/** Type of entity referencing the file (e.g., 'form_field', 'chat_message') */
|
|
@@ -480,7 +481,7 @@ interface FileWithStatus {
|
|
|
480
481
|
/** Full metadata record (V2) */
|
|
481
482
|
record: FileMetadataRecordV2;
|
|
482
483
|
/** Parsed file references */
|
|
483
|
-
refs: FileRef[];
|
|
484
|
+
refs: FileRef$1[];
|
|
484
485
|
/** Whether the file has zero references */
|
|
485
486
|
is_orphaned: boolean;
|
|
486
487
|
}
|
|
@@ -878,7 +879,7 @@ declare class FileMetadataService {
|
|
|
878
879
|
/**
|
|
879
880
|
* Get all references for a file
|
|
880
881
|
*/
|
|
881
|
-
getRefs(fileId: string): Promise<FileRef[] | null>;
|
|
882
|
+
getRefs(fileId: string): Promise<FileRef$1[] | null>;
|
|
882
883
|
/**
|
|
883
884
|
* Get a file with its status and parsed refs
|
|
884
885
|
*/
|
|
@@ -3256,24 +3257,24 @@ declare function generateRefId(): string;
|
|
|
3256
3257
|
* Parse a JSON string into FileRef array.
|
|
3257
3258
|
* Returns empty array on invalid input.
|
|
3258
3259
|
*/
|
|
3259
|
-
declare function parseFileRefs(json: string | null | undefined): FileRef[];
|
|
3260
|
+
declare function parseFileRefs(json: string | null | undefined): FileRef$1[];
|
|
3260
3261
|
/**
|
|
3261
3262
|
* Serialize FileRef array to JSON string
|
|
3262
3263
|
*/
|
|
3263
|
-
declare function stringifyFileRefs(refs: FileRef[]): string;
|
|
3264
|
+
declare function stringifyFileRefs(refs: FileRef$1[]): string;
|
|
3264
3265
|
/**
|
|
3265
3266
|
* Create a FileRef from AddRefOptions
|
|
3266
3267
|
*/
|
|
3267
|
-
declare function createFileRef(options: AddRefOptions): FileRef;
|
|
3268
|
+
declare function createFileRef(options: AddRefOptions): FileRef$1;
|
|
3268
3269
|
/**
|
|
3269
3270
|
* Remove a ref by ref_id (immutable)
|
|
3270
3271
|
*/
|
|
3271
|
-
declare function removeRefFromArray(refs: FileRef[], refId: string): FileRef[];
|
|
3272
|
+
declare function removeRefFromArray(refs: FileRef$1[], refId: string): FileRef$1[];
|
|
3272
3273
|
/**
|
|
3273
3274
|
* Remove refs matching criteria from array (immutable).
|
|
3274
3275
|
* All specified criteria fields must match (AND semantics).
|
|
3275
3276
|
*/
|
|
3276
|
-
declare function removeRefsByCriteriaFromArray(refs: FileRef[], criteria: Omit<RemoveRefsCriteria, 'file_id' | 'scope_id'>): FileRef[];
|
|
3277
|
+
declare function removeRefsByCriteriaFromArray(refs: FileRef$1[], criteria: Omit<RemoveRefsCriteria, 'file_id' | 'scope_id'>): FileRef$1[];
|
|
3277
3278
|
/**
|
|
3278
3279
|
* Safely cast a FileMetadataRecord to FileMetadataRecordV2.
|
|
3279
3280
|
* Missing V2 fields are defaulted.
|
|
@@ -3284,4 +3285,90 @@ declare function toV2Record(record: FileMetadataRecord): FileMetadataRecordV2;
|
|
|
3284
3285
|
*/
|
|
3285
3286
|
declare function buildFileWithStatus(record: FileMetadataRecord): FileWithStatus;
|
|
3286
3287
|
|
|
3287
|
-
|
|
3288
|
+
declare class NamingTemplate {
|
|
3289
|
+
static render(template: string, vars: Record<string, unknown>): string;
|
|
3290
|
+
}
|
|
3291
|
+
|
|
3292
|
+
type StoragePath = string;
|
|
3293
|
+
interface PutOpts {
|
|
3294
|
+
/** Reject if the target path already exists (atomic). */
|
|
3295
|
+
ifNotExists?: boolean;
|
|
3296
|
+
/** Hint for content-type; provider may sniff if absent. */
|
|
3297
|
+
contentType?: string;
|
|
3298
|
+
/** Free-form key/value metadata persisted with the file when supported. */
|
|
3299
|
+
metadata?: Record<string, string>;
|
|
3300
|
+
}
|
|
3301
|
+
interface PutResult {
|
|
3302
|
+
/** Provider tag — `"app_file_server"`, `"gdrive"`, `"in_memory"`. */
|
|
3303
|
+
provider: string;
|
|
3304
|
+
/** Provider-native identifier (path for app-server; file ID for GDrive). */
|
|
3305
|
+
native_id: string;
|
|
3306
|
+
/** Size in bytes of the persisted body. */
|
|
3307
|
+
size: number;
|
|
3308
|
+
}
|
|
3309
|
+
interface SignedUrlOpts {
|
|
3310
|
+
/** Seconds the URL is valid for. */
|
|
3311
|
+
ttl_seconds?: number;
|
|
3312
|
+
/** Suggested download filename (Content-Disposition). */
|
|
3313
|
+
filename_hint?: string;
|
|
3314
|
+
}
|
|
3315
|
+
interface ProbeResult {
|
|
3316
|
+
ok: boolean;
|
|
3317
|
+
/** Machine-readable error tag when ok=false. */
|
|
3318
|
+
error?: "drive_not_shared" | "write_denied" | "invalid_id" | "transient" | "config_missing";
|
|
3319
|
+
/** Free-form detail for logging. */
|
|
3320
|
+
message?: string;
|
|
3321
|
+
}
|
|
3322
|
+
/**
|
|
3323
|
+
* Storage provider abstraction. Every method MUST be idempotent at the
|
|
3324
|
+
* data-content level — re-invoking put with identical body is allowed.
|
|
3325
|
+
*
|
|
3326
|
+
* Paths are logical; providers translate to native identifiers internally.
|
|
3327
|
+
*/
|
|
3328
|
+
interface FileStorageProvider {
|
|
3329
|
+
put(path: StoragePath, body: Buffer | Readable, opts?: PutOpts): Promise<PutResult>;
|
|
3330
|
+
get(path: StoragePath): Promise<Buffer | Readable>;
|
|
3331
|
+
delete(path: StoragePath): Promise<void>;
|
|
3332
|
+
exists(path: StoragePath): Promise<boolean>;
|
|
3333
|
+
getSignedUrl(path: StoragePath, opts?: SignedUrlOpts): Promise<string>;
|
|
3334
|
+
/** Used by validation cron + onboarding step 2. */
|
|
3335
|
+
probe(): Promise<ProbeResult>;
|
|
3336
|
+
}
|
|
3337
|
+
|
|
3338
|
+
interface WriteWithCollisionOpts {
|
|
3339
|
+
provider: FileStorageProvider;
|
|
3340
|
+
template: string;
|
|
3341
|
+
vars: Record<string, unknown>;
|
|
3342
|
+
body: Buffer;
|
|
3343
|
+
/** Prefix to mount the rendered filename under, e.g. `"folders/abc123"`. */
|
|
3344
|
+
path_prefix?: string;
|
|
3345
|
+
/** Max attempts. Default 999. */
|
|
3346
|
+
max_attempts?: number;
|
|
3347
|
+
/** Extra opts passed to provider.put (ifNotExists is always forced true). */
|
|
3348
|
+
put_opts?: Omit<PutOpts, "ifNotExists">;
|
|
3349
|
+
}
|
|
3350
|
+
declare function writeWithCollisionRetry(opts: WriteWithCollisionOpts): Promise<PutResult & {
|
|
3351
|
+
logical_path: string;
|
|
3352
|
+
}>;
|
|
3353
|
+
|
|
3354
|
+
interface FileRef {
|
|
3355
|
+
ref_id: string;
|
|
3356
|
+
ref_type: string;
|
|
3357
|
+
ref_source: string;
|
|
3358
|
+
form_id?: string;
|
|
3359
|
+
field_id?: string;
|
|
3360
|
+
message_id?: string;
|
|
3361
|
+
document_id?: string;
|
|
3362
|
+
created_at: string;
|
|
3363
|
+
removed_at?: string;
|
|
3364
|
+
}
|
|
3365
|
+
interface FileWithRefs {
|
|
3366
|
+
file_refs: FileRef[];
|
|
3367
|
+
ref_count: number;
|
|
3368
|
+
}
|
|
3369
|
+
|
|
3370
|
+
declare function addRef<T extends FileWithRefs>(file: T, ref: FileRef): T;
|
|
3371
|
+
declare function removeRef<T extends FileWithRefs>(file: T, ref_id: string, removed_at: string): T;
|
|
3372
|
+
declare function countRefs(file: Pick<FileWithRefs, "file_refs">): number;
|
|
3373
|
+
|
|
3374
|
+
export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type ContentTagConfig, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, DropboxAuth, type DropboxAuthCallbacks, type DropboxAuthConfig, type DropboxConfig, DropboxModule, type DropboxTokenData, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef$1 as FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithRefs, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_MIGRATION_V3, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesMigrationV3, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesTableSchema, type HazoLLMInstance, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMFactoryConfig, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, NamingTemplate, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, type WriteWithCollisionOpts, addExtractionToFileData, addRef, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, countRefs, createAndInitializeModule, createDropboxAuth, createDropboxModule, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMigrationV3ForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, migrateToV3, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRef, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath, writeWithCollisionRetry };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
1
2
|
import { OAuth2Client } from 'google-auth-library';
|
|
2
3
|
|
|
3
4
|
/**
|
|
@@ -396,7 +397,7 @@ type FileRefVisibility = 'public' | 'private' | 'internal';
|
|
|
396
397
|
* A reference from an entity to a file.
|
|
397
398
|
* Multiple entities can reference the same file.
|
|
398
399
|
*/
|
|
399
|
-
interface FileRef {
|
|
400
|
+
interface FileRef$1 {
|
|
400
401
|
/** Unique ID for this reference */
|
|
401
402
|
ref_id: string;
|
|
402
403
|
/** Type of entity referencing the file (e.g., 'form_field', 'chat_message') */
|
|
@@ -480,7 +481,7 @@ interface FileWithStatus {
|
|
|
480
481
|
/** Full metadata record (V2) */
|
|
481
482
|
record: FileMetadataRecordV2;
|
|
482
483
|
/** Parsed file references */
|
|
483
|
-
refs: FileRef[];
|
|
484
|
+
refs: FileRef$1[];
|
|
484
485
|
/** Whether the file has zero references */
|
|
485
486
|
is_orphaned: boolean;
|
|
486
487
|
}
|
|
@@ -878,7 +879,7 @@ declare class FileMetadataService {
|
|
|
878
879
|
/**
|
|
879
880
|
* Get all references for a file
|
|
880
881
|
*/
|
|
881
|
-
getRefs(fileId: string): Promise<FileRef[] | null>;
|
|
882
|
+
getRefs(fileId: string): Promise<FileRef$1[] | null>;
|
|
882
883
|
/**
|
|
883
884
|
* Get a file with its status and parsed refs
|
|
884
885
|
*/
|
|
@@ -3256,24 +3257,24 @@ declare function generateRefId(): string;
|
|
|
3256
3257
|
* Parse a JSON string into FileRef array.
|
|
3257
3258
|
* Returns empty array on invalid input.
|
|
3258
3259
|
*/
|
|
3259
|
-
declare function parseFileRefs(json: string | null | undefined): FileRef[];
|
|
3260
|
+
declare function parseFileRefs(json: string | null | undefined): FileRef$1[];
|
|
3260
3261
|
/**
|
|
3261
3262
|
* Serialize FileRef array to JSON string
|
|
3262
3263
|
*/
|
|
3263
|
-
declare function stringifyFileRefs(refs: FileRef[]): string;
|
|
3264
|
+
declare function stringifyFileRefs(refs: FileRef$1[]): string;
|
|
3264
3265
|
/**
|
|
3265
3266
|
* Create a FileRef from AddRefOptions
|
|
3266
3267
|
*/
|
|
3267
|
-
declare function createFileRef(options: AddRefOptions): FileRef;
|
|
3268
|
+
declare function createFileRef(options: AddRefOptions): FileRef$1;
|
|
3268
3269
|
/**
|
|
3269
3270
|
* Remove a ref by ref_id (immutable)
|
|
3270
3271
|
*/
|
|
3271
|
-
declare function removeRefFromArray(refs: FileRef[], refId: string): FileRef[];
|
|
3272
|
+
declare function removeRefFromArray(refs: FileRef$1[], refId: string): FileRef$1[];
|
|
3272
3273
|
/**
|
|
3273
3274
|
* Remove refs matching criteria from array (immutable).
|
|
3274
3275
|
* All specified criteria fields must match (AND semantics).
|
|
3275
3276
|
*/
|
|
3276
|
-
declare function removeRefsByCriteriaFromArray(refs: FileRef[], criteria: Omit<RemoveRefsCriteria, 'file_id' | 'scope_id'>): FileRef[];
|
|
3277
|
+
declare function removeRefsByCriteriaFromArray(refs: FileRef$1[], criteria: Omit<RemoveRefsCriteria, 'file_id' | 'scope_id'>): FileRef$1[];
|
|
3277
3278
|
/**
|
|
3278
3279
|
* Safely cast a FileMetadataRecord to FileMetadataRecordV2.
|
|
3279
3280
|
* Missing V2 fields are defaulted.
|
|
@@ -3284,4 +3285,90 @@ declare function toV2Record(record: FileMetadataRecord): FileMetadataRecordV2;
|
|
|
3284
3285
|
*/
|
|
3285
3286
|
declare function buildFileWithStatus(record: FileMetadataRecord): FileWithStatus;
|
|
3286
3287
|
|
|
3287
|
-
|
|
3288
|
+
declare class NamingTemplate {
|
|
3289
|
+
static render(template: string, vars: Record<string, unknown>): string;
|
|
3290
|
+
}
|
|
3291
|
+
|
|
3292
|
+
type StoragePath = string;
|
|
3293
|
+
interface PutOpts {
|
|
3294
|
+
/** Reject if the target path already exists (atomic). */
|
|
3295
|
+
ifNotExists?: boolean;
|
|
3296
|
+
/** Hint for content-type; provider may sniff if absent. */
|
|
3297
|
+
contentType?: string;
|
|
3298
|
+
/** Free-form key/value metadata persisted with the file when supported. */
|
|
3299
|
+
metadata?: Record<string, string>;
|
|
3300
|
+
}
|
|
3301
|
+
interface PutResult {
|
|
3302
|
+
/** Provider tag — `"app_file_server"`, `"gdrive"`, `"in_memory"`. */
|
|
3303
|
+
provider: string;
|
|
3304
|
+
/** Provider-native identifier (path for app-server; file ID for GDrive). */
|
|
3305
|
+
native_id: string;
|
|
3306
|
+
/** Size in bytes of the persisted body. */
|
|
3307
|
+
size: number;
|
|
3308
|
+
}
|
|
3309
|
+
interface SignedUrlOpts {
|
|
3310
|
+
/** Seconds the URL is valid for. */
|
|
3311
|
+
ttl_seconds?: number;
|
|
3312
|
+
/** Suggested download filename (Content-Disposition). */
|
|
3313
|
+
filename_hint?: string;
|
|
3314
|
+
}
|
|
3315
|
+
interface ProbeResult {
|
|
3316
|
+
ok: boolean;
|
|
3317
|
+
/** Machine-readable error tag when ok=false. */
|
|
3318
|
+
error?: "drive_not_shared" | "write_denied" | "invalid_id" | "transient" | "config_missing";
|
|
3319
|
+
/** Free-form detail for logging. */
|
|
3320
|
+
message?: string;
|
|
3321
|
+
}
|
|
3322
|
+
/**
|
|
3323
|
+
* Storage provider abstraction. Every method MUST be idempotent at the
|
|
3324
|
+
* data-content level — re-invoking put with identical body is allowed.
|
|
3325
|
+
*
|
|
3326
|
+
* Paths are logical; providers translate to native identifiers internally.
|
|
3327
|
+
*/
|
|
3328
|
+
interface FileStorageProvider {
|
|
3329
|
+
put(path: StoragePath, body: Buffer | Readable, opts?: PutOpts): Promise<PutResult>;
|
|
3330
|
+
get(path: StoragePath): Promise<Buffer | Readable>;
|
|
3331
|
+
delete(path: StoragePath): Promise<void>;
|
|
3332
|
+
exists(path: StoragePath): Promise<boolean>;
|
|
3333
|
+
getSignedUrl(path: StoragePath, opts?: SignedUrlOpts): Promise<string>;
|
|
3334
|
+
/** Used by validation cron + onboarding step 2. */
|
|
3335
|
+
probe(): Promise<ProbeResult>;
|
|
3336
|
+
}
|
|
3337
|
+
|
|
3338
|
+
interface WriteWithCollisionOpts {
|
|
3339
|
+
provider: FileStorageProvider;
|
|
3340
|
+
template: string;
|
|
3341
|
+
vars: Record<string, unknown>;
|
|
3342
|
+
body: Buffer;
|
|
3343
|
+
/** Prefix to mount the rendered filename under, e.g. `"folders/abc123"`. */
|
|
3344
|
+
path_prefix?: string;
|
|
3345
|
+
/** Max attempts. Default 999. */
|
|
3346
|
+
max_attempts?: number;
|
|
3347
|
+
/** Extra opts passed to provider.put (ifNotExists is always forced true). */
|
|
3348
|
+
put_opts?: Omit<PutOpts, "ifNotExists">;
|
|
3349
|
+
}
|
|
3350
|
+
declare function writeWithCollisionRetry(opts: WriteWithCollisionOpts): Promise<PutResult & {
|
|
3351
|
+
logical_path: string;
|
|
3352
|
+
}>;
|
|
3353
|
+
|
|
3354
|
+
interface FileRef {
|
|
3355
|
+
ref_id: string;
|
|
3356
|
+
ref_type: string;
|
|
3357
|
+
ref_source: string;
|
|
3358
|
+
form_id?: string;
|
|
3359
|
+
field_id?: string;
|
|
3360
|
+
message_id?: string;
|
|
3361
|
+
document_id?: string;
|
|
3362
|
+
created_at: string;
|
|
3363
|
+
removed_at?: string;
|
|
3364
|
+
}
|
|
3365
|
+
interface FileWithRefs {
|
|
3366
|
+
file_refs: FileRef[];
|
|
3367
|
+
ref_count: number;
|
|
3368
|
+
}
|
|
3369
|
+
|
|
3370
|
+
declare function addRef<T extends FileWithRefs>(file: T, ref: FileRef): T;
|
|
3371
|
+
declare function removeRef<T extends FileWithRefs>(file: T, ref_id: string, removed_at: string): T;
|
|
3372
|
+
declare function countRefs(file: Pick<FileWithRefs, "file_refs">): number;
|
|
3373
|
+
|
|
3374
|
+
export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type ContentTagConfig, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, DropboxAuth, type DropboxAuthCallbacks, type DropboxAuthConfig, type DropboxConfig, DropboxModule, type DropboxTokenData, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef$1 as FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithRefs, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_MIGRATION_V3, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesMigrationV3, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesTableSchema, type HazoLLMInstance, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMFactoryConfig, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, NamingTemplate, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, type WriteWithCollisionOpts, addExtractionToFileData, addRef, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, countRefs, createAndInitializeModule, createDropboxAuth, createDropboxModule, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMigrationV3ForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, migrateToV3, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRef, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath, writeWithCollisionRetry };
|
package/dist/index.js
CHANGED
|
@@ -58,6 +58,7 @@ __export(index_exports, {
|
|
|
58
58
|
LLMExtractionService: () => LLMExtractionService,
|
|
59
59
|
LocalStorageModule: () => LocalStorageModule,
|
|
60
60
|
NamingConventionService: () => NamingConventionService,
|
|
61
|
+
NamingTemplate: () => NamingTemplate,
|
|
61
62
|
OperationError: () => OperationError,
|
|
62
63
|
PermissionDeniedError: () => PermissionDeniedError,
|
|
63
64
|
SYSTEM_COUNTER_VARIABLES: () => SYSTEM_COUNTER_VARIABLES,
|
|
@@ -66,6 +67,7 @@ __export(index_exports, {
|
|
|
66
67
|
TrackedFileManager: () => TrackedFileManager,
|
|
67
68
|
UploadExtractService: () => UploadExtractService,
|
|
68
69
|
addExtractionToFileData: () => addExtractionToFileData,
|
|
70
|
+
addRef: () => addRef,
|
|
69
71
|
backfillV2Defaults: () => backfillV2Defaults,
|
|
70
72
|
buildFileWithStatus: () => buildFileWithStatus,
|
|
71
73
|
clearExtractions: () => clearExtractions,
|
|
@@ -74,6 +76,7 @@ __export(index_exports, {
|
|
|
74
76
|
computeFileHashFromStream: () => computeFileHashFromStream,
|
|
75
77
|
computeFileHashSync: () => computeFileHashSync,
|
|
76
78
|
computeFileInfo: () => computeFileInfo,
|
|
79
|
+
countRefs: () => countRefs,
|
|
77
80
|
createAndInitializeModule: () => createAndInitializeModule,
|
|
78
81
|
createDropboxAuth: () => createDropboxAuth,
|
|
79
82
|
createDropboxModule: () => createDropboxModule,
|
|
@@ -164,6 +167,7 @@ __export(index_exports, {
|
|
|
164
167
|
registerModule: () => registerModule,
|
|
165
168
|
removeExtractionById: () => removeExtractionById,
|
|
166
169
|
removeExtractionByIndex: () => removeExtractionByIndex,
|
|
170
|
+
removeRef: () => removeRef,
|
|
167
171
|
removeRefFromArray: () => removeRefFromArray,
|
|
168
172
|
removeRefsByCriteriaFromArray: () => removeRefsByCriteriaFromArray,
|
|
169
173
|
sanitizeFilename: () => sanitizeFilename,
|
|
@@ -177,7 +181,8 @@ __export(index_exports, {
|
|
|
177
181
|
validateExtractionData: () => validateExtractionData,
|
|
178
182
|
validateFileDataStructure: () => validateFileDataStructure,
|
|
179
183
|
validateNamingRuleSchema: () => validateNamingRuleSchema,
|
|
180
|
-
validatePath: () => validatePath
|
|
184
|
+
validatePath: () => validatePath,
|
|
185
|
+
writeWithCollisionRetry: () => writeWithCollisionRetry
|
|
181
186
|
});
|
|
182
187
|
module.exports = __toCommonJS(index_exports);
|
|
183
188
|
|
|
@@ -6013,6 +6018,107 @@ async function migrateToV3(executor, dbType, tableName) {
|
|
|
6013
6018
|
await executor.run(idx);
|
|
6014
6019
|
}
|
|
6015
6020
|
}
|
|
6021
|
+
|
|
6022
|
+
// src/naming/template.ts
|
|
6023
|
+
var VAR_RE = /\{([a-zA-Z_][\w.]*)((?:\|[a-zA-Z_]+(?::[^}|]+)?)*)\}/g;
|
|
6024
|
+
var BAD_CHARS = /[/\\:*?"<>|]/g;
|
|
6025
|
+
var MAX_LEN = 200;
|
|
6026
|
+
var NamingTemplate = class {
|
|
6027
|
+
static render(template, vars) {
|
|
6028
|
+
const out = template.replace(VAR_RE, (_, name, pipes) => {
|
|
6029
|
+
const value = resolveDottedPath(vars, name);
|
|
6030
|
+
if (value === void 0) throw new Error(`missing variable: ${name}`);
|
|
6031
|
+
let rendered = stringify2(value);
|
|
6032
|
+
const formatters = pipes ? pipes.split("|").filter(Boolean) : [];
|
|
6033
|
+
for (const f of formatters) {
|
|
6034
|
+
const [fname, ...args] = f.split(":");
|
|
6035
|
+
rendered = applyFormatter(fname, args.join(":"), rendered);
|
|
6036
|
+
}
|
|
6037
|
+
return rendered;
|
|
6038
|
+
});
|
|
6039
|
+
return out.replace(BAD_CHARS, "_").slice(0, MAX_LEN);
|
|
6040
|
+
}
|
|
6041
|
+
};
|
|
6042
|
+
function resolveDottedPath(vars, path4) {
|
|
6043
|
+
return path4.split(".").reduce((acc, key) => acc == null ? void 0 : acc[key], vars);
|
|
6044
|
+
}
|
|
6045
|
+
function stringify2(v) {
|
|
6046
|
+
if (v == null) return "";
|
|
6047
|
+
if (v instanceof Date) return v.toISOString();
|
|
6048
|
+
return String(v);
|
|
6049
|
+
}
|
|
6050
|
+
function applyFormatter(name, arg, value) {
|
|
6051
|
+
switch (name) {
|
|
6052
|
+
case "slug":
|
|
6053
|
+
return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "");
|
|
6054
|
+
case "pad": {
|
|
6055
|
+
const n = Number(arg);
|
|
6056
|
+
if (!arg || isNaN(n)) throw new Error(`pad formatter requires a numeric arg, got: "${arg}"`);
|
|
6057
|
+
return value.padStart(n, "0");
|
|
6058
|
+
}
|
|
6059
|
+
case "upper":
|
|
6060
|
+
return value.toUpperCase();
|
|
6061
|
+
case "lower":
|
|
6062
|
+
return value.toLowerCase();
|
|
6063
|
+
case "truncate": {
|
|
6064
|
+
const n = Number(arg);
|
|
6065
|
+
if (!arg || isNaN(n)) throw new Error(`truncate formatter requires a numeric arg, got: "${arg}"`);
|
|
6066
|
+
return value.slice(0, n);
|
|
6067
|
+
}
|
|
6068
|
+
case "date": {
|
|
6069
|
+
if (!value) throw new Error(`date formatter received empty value`);
|
|
6070
|
+
const d = new Date(value);
|
|
6071
|
+
if (isNaN(d.getTime())) throw new Error(`date formatter received invalid date: "${value}"`);
|
|
6072
|
+
const pad2 = (n) => String(n).padStart(2, "0");
|
|
6073
|
+
return arg.replace("YYYY", String(d.getUTCFullYear())).replace("MM", pad2(d.getUTCMonth() + 1)).replace("DD", pad2(d.getUTCDate())).replace("HH", pad2(d.getUTCHours())).replace("mm", pad2(d.getUTCMinutes())).replace("ss", pad2(d.getUTCSeconds()));
|
|
6074
|
+
}
|
|
6075
|
+
default:
|
|
6076
|
+
throw new Error(`Unknown formatter: ${name}`);
|
|
6077
|
+
}
|
|
6078
|
+
}
|
|
6079
|
+
|
|
6080
|
+
// src/providers/types.ts
|
|
6081
|
+
var StorageCollisionExhausted = class extends Error {
|
|
6082
|
+
constructor(attempts, lastPath) {
|
|
6083
|
+
super(`Storage collision could not be resolved after ${attempts} attempts at "${lastPath}"`);
|
|
6084
|
+
this.attempts = attempts;
|
|
6085
|
+
this.lastPath = lastPath;
|
|
6086
|
+
this.name = "StorageCollisionExhausted";
|
|
6087
|
+
}
|
|
6088
|
+
};
|
|
6089
|
+
|
|
6090
|
+
// src/naming/collision.ts
|
|
6091
|
+
async function writeWithCollisionRetry(opts) {
|
|
6092
|
+
const max = opts.max_attempts ?? 999;
|
|
6093
|
+
let lastPath = "";
|
|
6094
|
+
for (let i = 1; i <= max; i++) {
|
|
6095
|
+
const rendered = NamingTemplate.render(opts.template, { ...opts.vars, index: i });
|
|
6096
|
+
const full = opts.path_prefix ? `${opts.path_prefix}/${rendered}` : rendered;
|
|
6097
|
+
lastPath = full;
|
|
6098
|
+
try {
|
|
6099
|
+
const res = await opts.provider.put(full, opts.body, { ...opts.put_opts, ifNotExists: true });
|
|
6100
|
+
return { ...res, logical_path: full };
|
|
6101
|
+
} catch (err) {
|
|
6102
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
6103
|
+
if (!/exists/i.test(msg)) throw err;
|
|
6104
|
+
}
|
|
6105
|
+
}
|
|
6106
|
+
throw new StorageCollisionExhausted(max, lastPath);
|
|
6107
|
+
}
|
|
6108
|
+
|
|
6109
|
+
// src/reference-tracking/index.ts
|
|
6110
|
+
function addRef(file, ref) {
|
|
6111
|
+
if (file.file_refs.some((r) => r.ref_id === ref.ref_id && !r.removed_at)) return file;
|
|
6112
|
+
const next = [...file.file_refs, ref];
|
|
6113
|
+
return { ...file, file_refs: next, ref_count: countRefs({ file_refs: next }) };
|
|
6114
|
+
}
|
|
6115
|
+
function removeRef(file, ref_id, removed_at) {
|
|
6116
|
+
const next = file.file_refs.map((r) => r.ref_id === ref_id && !r.removed_at ? { ...r, removed_at } : r);
|
|
6117
|
+
return { ...file, file_refs: next, ref_count: countRefs({ file_refs: next }) };
|
|
6118
|
+
}
|
|
6119
|
+
function countRefs(file) {
|
|
6120
|
+
return file.file_refs.filter((r) => !r.removed_at).length;
|
|
6121
|
+
}
|
|
6016
6122
|
// Annotate the CommonJS export names for ESM import in node:
|
|
6017
6123
|
0 && (module.exports = {
|
|
6018
6124
|
ALL_SYSTEM_VARIABLES,
|
|
@@ -6043,6 +6149,7 @@ async function migrateToV3(executor, dbType, tableName) {
|
|
|
6043
6149
|
LLMExtractionService,
|
|
6044
6150
|
LocalStorageModule,
|
|
6045
6151
|
NamingConventionService,
|
|
6152
|
+
NamingTemplate,
|
|
6046
6153
|
OperationError,
|
|
6047
6154
|
PermissionDeniedError,
|
|
6048
6155
|
SYSTEM_COUNTER_VARIABLES,
|
|
@@ -6051,6 +6158,7 @@ async function migrateToV3(executor, dbType, tableName) {
|
|
|
6051
6158
|
TrackedFileManager,
|
|
6052
6159
|
UploadExtractService,
|
|
6053
6160
|
addExtractionToFileData,
|
|
6161
|
+
addRef,
|
|
6054
6162
|
backfillV2Defaults,
|
|
6055
6163
|
buildFileWithStatus,
|
|
6056
6164
|
clearExtractions,
|
|
@@ -6059,6 +6167,7 @@ async function migrateToV3(executor, dbType, tableName) {
|
|
|
6059
6167
|
computeFileHashFromStream,
|
|
6060
6168
|
computeFileHashSync,
|
|
6061
6169
|
computeFileInfo,
|
|
6170
|
+
countRefs,
|
|
6062
6171
|
createAndInitializeModule,
|
|
6063
6172
|
createDropboxAuth,
|
|
6064
6173
|
createDropboxModule,
|
|
@@ -6149,6 +6258,7 @@ async function migrateToV3(executor, dbType, tableName) {
|
|
|
6149
6258
|
registerModule,
|
|
6150
6259
|
removeExtractionById,
|
|
6151
6260
|
removeExtractionByIndex,
|
|
6261
|
+
removeRef,
|
|
6152
6262
|
removeRefFromArray,
|
|
6153
6263
|
removeRefsByCriteriaFromArray,
|
|
6154
6264
|
sanitizeFilename,
|
|
@@ -6162,5 +6272,6 @@ async function migrateToV3(executor, dbType, tableName) {
|
|
|
6162
6272
|
validateExtractionData,
|
|
6163
6273
|
validateFileDataStructure,
|
|
6164
6274
|
validateNamingRuleSchema,
|
|
6165
|
-
validatePath
|
|
6275
|
+
validatePath,
|
|
6276
|
+
writeWithCollisionRetry
|
|
6166
6277
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -5830,6 +5830,107 @@ async function migrateToV3(executor, dbType, tableName) {
|
|
|
5830
5830
|
await executor.run(idx);
|
|
5831
5831
|
}
|
|
5832
5832
|
}
|
|
5833
|
+
|
|
5834
|
+
// src/naming/template.ts
|
|
5835
|
+
var VAR_RE = /\{([a-zA-Z_][\w.]*)((?:\|[a-zA-Z_]+(?::[^}|]+)?)*)\}/g;
|
|
5836
|
+
var BAD_CHARS = /[/\\:*?"<>|]/g;
|
|
5837
|
+
var MAX_LEN = 200;
|
|
5838
|
+
var NamingTemplate = class {
|
|
5839
|
+
static render(template, vars) {
|
|
5840
|
+
const out = template.replace(VAR_RE, (_, name, pipes) => {
|
|
5841
|
+
const value = resolveDottedPath(vars, name);
|
|
5842
|
+
if (value === void 0) throw new Error(`missing variable: ${name}`);
|
|
5843
|
+
let rendered = stringify2(value);
|
|
5844
|
+
const formatters = pipes ? pipes.split("|").filter(Boolean) : [];
|
|
5845
|
+
for (const f of formatters) {
|
|
5846
|
+
const [fname, ...args] = f.split(":");
|
|
5847
|
+
rendered = applyFormatter(fname, args.join(":"), rendered);
|
|
5848
|
+
}
|
|
5849
|
+
return rendered;
|
|
5850
|
+
});
|
|
5851
|
+
return out.replace(BAD_CHARS, "_").slice(0, MAX_LEN);
|
|
5852
|
+
}
|
|
5853
|
+
};
|
|
5854
|
+
function resolveDottedPath(vars, path4) {
|
|
5855
|
+
return path4.split(".").reduce((acc, key) => acc == null ? void 0 : acc[key], vars);
|
|
5856
|
+
}
|
|
5857
|
+
function stringify2(v) {
|
|
5858
|
+
if (v == null) return "";
|
|
5859
|
+
if (v instanceof Date) return v.toISOString();
|
|
5860
|
+
return String(v);
|
|
5861
|
+
}
|
|
5862
|
+
function applyFormatter(name, arg, value) {
|
|
5863
|
+
switch (name) {
|
|
5864
|
+
case "slug":
|
|
5865
|
+
return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "");
|
|
5866
|
+
case "pad": {
|
|
5867
|
+
const n = Number(arg);
|
|
5868
|
+
if (!arg || isNaN(n)) throw new Error(`pad formatter requires a numeric arg, got: "${arg}"`);
|
|
5869
|
+
return value.padStart(n, "0");
|
|
5870
|
+
}
|
|
5871
|
+
case "upper":
|
|
5872
|
+
return value.toUpperCase();
|
|
5873
|
+
case "lower":
|
|
5874
|
+
return value.toLowerCase();
|
|
5875
|
+
case "truncate": {
|
|
5876
|
+
const n = Number(arg);
|
|
5877
|
+
if (!arg || isNaN(n)) throw new Error(`truncate formatter requires a numeric arg, got: "${arg}"`);
|
|
5878
|
+
return value.slice(0, n);
|
|
5879
|
+
}
|
|
5880
|
+
case "date": {
|
|
5881
|
+
if (!value) throw new Error(`date formatter received empty value`);
|
|
5882
|
+
const d = new Date(value);
|
|
5883
|
+
if (isNaN(d.getTime())) throw new Error(`date formatter received invalid date: "${value}"`);
|
|
5884
|
+
const pad2 = (n) => String(n).padStart(2, "0");
|
|
5885
|
+
return arg.replace("YYYY", String(d.getUTCFullYear())).replace("MM", pad2(d.getUTCMonth() + 1)).replace("DD", pad2(d.getUTCDate())).replace("HH", pad2(d.getUTCHours())).replace("mm", pad2(d.getUTCMinutes())).replace("ss", pad2(d.getUTCSeconds()));
|
|
5886
|
+
}
|
|
5887
|
+
default:
|
|
5888
|
+
throw new Error(`Unknown formatter: ${name}`);
|
|
5889
|
+
}
|
|
5890
|
+
}
|
|
5891
|
+
|
|
5892
|
+
// src/providers/types.ts
|
|
5893
|
+
var StorageCollisionExhausted = class extends Error {
|
|
5894
|
+
constructor(attempts, lastPath) {
|
|
5895
|
+
super(`Storage collision could not be resolved after ${attempts} attempts at "${lastPath}"`);
|
|
5896
|
+
this.attempts = attempts;
|
|
5897
|
+
this.lastPath = lastPath;
|
|
5898
|
+
this.name = "StorageCollisionExhausted";
|
|
5899
|
+
}
|
|
5900
|
+
};
|
|
5901
|
+
|
|
5902
|
+
// src/naming/collision.ts
|
|
5903
|
+
async function writeWithCollisionRetry(opts) {
|
|
5904
|
+
const max = opts.max_attempts ?? 999;
|
|
5905
|
+
let lastPath = "";
|
|
5906
|
+
for (let i = 1; i <= max; i++) {
|
|
5907
|
+
const rendered = NamingTemplate.render(opts.template, { ...opts.vars, index: i });
|
|
5908
|
+
const full = opts.path_prefix ? `${opts.path_prefix}/${rendered}` : rendered;
|
|
5909
|
+
lastPath = full;
|
|
5910
|
+
try {
|
|
5911
|
+
const res = await opts.provider.put(full, opts.body, { ...opts.put_opts, ifNotExists: true });
|
|
5912
|
+
return { ...res, logical_path: full };
|
|
5913
|
+
} catch (err) {
|
|
5914
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
5915
|
+
if (!/exists/i.test(msg)) throw err;
|
|
5916
|
+
}
|
|
5917
|
+
}
|
|
5918
|
+
throw new StorageCollisionExhausted(max, lastPath);
|
|
5919
|
+
}
|
|
5920
|
+
|
|
5921
|
+
// src/reference-tracking/index.ts
|
|
5922
|
+
function addRef(file, ref) {
|
|
5923
|
+
if (file.file_refs.some((r) => r.ref_id === ref.ref_id && !r.removed_at)) return file;
|
|
5924
|
+
const next = [...file.file_refs, ref];
|
|
5925
|
+
return { ...file, file_refs: next, ref_count: countRefs({ file_refs: next }) };
|
|
5926
|
+
}
|
|
5927
|
+
function removeRef(file, ref_id, removed_at) {
|
|
5928
|
+
const next = file.file_refs.map((r) => r.ref_id === ref_id && !r.removed_at ? { ...r, removed_at } : r);
|
|
5929
|
+
return { ...file, file_refs: next, ref_count: countRefs({ file_refs: next }) };
|
|
5930
|
+
}
|
|
5931
|
+
function countRefs(file) {
|
|
5932
|
+
return file.file_refs.filter((r) => !r.removed_at).length;
|
|
5933
|
+
}
|
|
5833
5934
|
export {
|
|
5834
5935
|
ALL_SYSTEM_VARIABLES,
|
|
5835
5936
|
AuthenticationError,
|
|
@@ -5859,6 +5960,7 @@ export {
|
|
|
5859
5960
|
LLMExtractionService,
|
|
5860
5961
|
LocalStorageModule,
|
|
5861
5962
|
NamingConventionService,
|
|
5963
|
+
NamingTemplate,
|
|
5862
5964
|
OperationError,
|
|
5863
5965
|
PermissionDeniedError,
|
|
5864
5966
|
SYSTEM_COUNTER_VARIABLES,
|
|
@@ -5867,6 +5969,7 @@ export {
|
|
|
5867
5969
|
TrackedFileManager,
|
|
5868
5970
|
UploadExtractService,
|
|
5869
5971
|
addExtractionToFileData,
|
|
5972
|
+
addRef,
|
|
5870
5973
|
backfillV2Defaults,
|
|
5871
5974
|
buildFileWithStatus,
|
|
5872
5975
|
clearExtractions,
|
|
@@ -5875,6 +5978,7 @@ export {
|
|
|
5875
5978
|
computeFileHashFromStream,
|
|
5876
5979
|
computeFileHashSync,
|
|
5877
5980
|
computeFileInfo,
|
|
5981
|
+
countRefs,
|
|
5878
5982
|
createAndInitializeModule,
|
|
5879
5983
|
createDropboxAuth,
|
|
5880
5984
|
createDropboxModule,
|
|
@@ -5965,6 +6069,7 @@ export {
|
|
|
5965
6069
|
registerModule,
|
|
5966
6070
|
removeExtractionById,
|
|
5967
6071
|
removeExtractionByIndex,
|
|
6072
|
+
removeRef,
|
|
5968
6073
|
removeRefFromArray,
|
|
5969
6074
|
removeRefsByCriteriaFromArray,
|
|
5970
6075
|
sanitizeFilename,
|
|
@@ -5978,5 +6083,6 @@ export {
|
|
|
5978
6083
|
validateExtractionData,
|
|
5979
6084
|
validateFileDataStructure,
|
|
5980
6085
|
validateNamingRuleSchema,
|
|
5981
|
-
validatePath
|
|
6086
|
+
validatePath,
|
|
6087
|
+
writeWithCollisionRetry
|
|
5982
6088
|
};
|