@redaksjon/protokoll 1.0.1 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ import 'node:crypto';
14
14
  import 'node:fs/promises';
15
15
  import 'html-to-text';
16
16
  import 'commander';
17
- import '@theunwalked/overcontext';
17
+ import '@utilarium/overcontext';
18
18
  import '@redaksjon/context';
19
19
  import 'js-yaml';
20
20
  import 'winston';
@@ -14,7 +14,7 @@ import 'node:crypto';
14
14
  import 'node:fs/promises';
15
15
  import 'html-to-text';
16
16
  import 'commander';
17
- import '@theunwalked/overcontext';
17
+ import '@utilarium/overcontext';
18
18
  import '@redaksjon/context';
19
19
  import 'js-yaml';
20
20
  import 'winston';
@@ -16,12 +16,12 @@ import crypto from 'node:crypto';
16
16
  import * as fs$1 from 'node:fs/promises';
17
17
  import { htmlToText } from 'html-to-text';
18
18
  import { Command } from 'commander';
19
- import { discoverOvercontext, discoverContextRoot } from '@theunwalked/overcontext';
19
+ import { discoverOvercontext, discoverContextRoot } from '@utilarium/overcontext';
20
20
  import { redaksjonPluralNames, redaksjonSchemas } from '@redaksjon/context';
21
21
  import * as yaml from 'js-yaml';
22
22
  import winston from 'winston';
23
23
 
24
- const VERSION = "1.0.1 (HEAD/0bd151a T:v1.0.1 2026-01-28 08:20:31 -0800) linux x64 v24.13.0";
24
+ const VERSION = "1.0.7 (HEAD/2b22e4f T:v1.0.7 2026-01-30 15:54:52 -0800) linux x64 v24.13.0";
25
25
  const PROGRAM_NAME = "protokoll";
26
26
  const DEFAULT_DIFF = true;
27
27
  const DEFAULT_LOG = false;
@@ -220,6 +220,9 @@ const create$w = (params) => {
220
220
  const deleteFile = async (path2) => {
221
221
  await fs.promises.unlink(path2);
222
222
  };
223
+ const deleteDirectory = async (path2) => {
224
+ await fs.promises.rm(path2, { recursive: true, force: true });
225
+ };
223
226
  const getFileSize = async (path2) => {
224
227
  const stats = await fs.promises.stat(path2);
225
228
  return stats.size;
@@ -241,15 +244,11 @@ const create$w = (params) => {
241
244
  hashFile,
242
245
  listFiles,
243
246
  deleteFile,
247
+ deleteDirectory,
244
248
  getFileSize
245
249
  };
246
250
  };
247
251
 
248
- const protokollDiscoveryOptions = {
249
- contextDirName: ".protokoll/context",
250
- maxLevels: 10
251
- };
252
-
253
252
  const TYPE_TO_DIRECTORY = {
254
253
  person: "people",
255
254
  project: "projects",
@@ -279,13 +278,16 @@ const create$v = () => {
279
278
  }
280
279
  try {
281
280
  const lastContextDir = contextDirs[contextDirs.length - 1];
282
- const protokollDir = lastContextDir.replace(/\/context$/, "");
283
- const startDir = protokollDir.replace(/\/\.protokoll$/, "");
281
+ const startDir = path.dirname(lastContextDir);
284
282
  api = await discoverOvercontext({
285
283
  schemas: redaksjonSchemas,
286
284
  pluralNames: redaksjonPluralNames,
285
+ // Use standard names without context/ prefix
287
286
  startDir,
288
- ...protokollDiscoveryOptions
287
+ contextDirName: path.basename(lastContextDir),
288
+ // Use actual context dir name
289
+ maxLevels: 1
290
+ // Limit discovery to prevent finding unrelated parent contexts
289
291
  });
290
292
  for (const type of ["person", "project", "company", "term", "ignored"]) {
291
293
  const entities = await api.getAll(type);
@@ -301,12 +303,12 @@ const create$v = () => {
301
303
  }
302
304
  }
303
305
  },
304
- async save(entity, _targetDir) {
306
+ async save(entity, _targetDir, allowUpdate = false) {
307
+ const existing = cache.get(entity.type)?.get(entity.id);
308
+ if (existing && !allowUpdate) {
309
+ throw new Error(`Entity with id "${entity.id}" already exists`);
310
+ }
305
311
  if (!api) {
306
- const existing = cache.get(entity.type)?.get(entity.id);
307
- if (existing) {
308
- throw new Error(`Entity with id "${entity.id}" already exists`);
309
- }
310
312
  cache.get(entity.type)?.set(entity.id, entity);
311
313
  return;
312
314
  }
@@ -397,6 +399,36 @@ const discoverConfigDirectories = async (options) => {
397
399
  level: dir.level
398
400
  }));
399
401
  };
402
+ const resolveContextDirectory = async (protokollDirPath, config) => {
403
+ const repoRoot = path.dirname(protokollDirPath);
404
+ if (config && typeof config.contextDirectory === "string") {
405
+ const explicitPath = path.isAbsolute(config.contextDirectory) ? config.contextDirectory : path.resolve(repoRoot, config.contextDirectory);
406
+ try {
407
+ const stat = await fs$1.stat(explicitPath);
408
+ if (stat.isDirectory()) {
409
+ return explicitPath;
410
+ }
411
+ } catch {
412
+ }
413
+ }
414
+ const rootContextDir = path.join(repoRoot, "context");
415
+ try {
416
+ const stat = await fs$1.stat(rootContextDir);
417
+ if (stat.isDirectory()) {
418
+ return rootContextDir;
419
+ }
420
+ } catch {
421
+ }
422
+ const legacyContextDir = path.join(protokollDirPath, "context");
423
+ try {
424
+ const stat = await fs$1.stat(legacyContextDir);
425
+ if (stat.isDirectory()) {
426
+ return legacyContextDir;
427
+ }
428
+ } catch {
429
+ }
430
+ return null;
431
+ };
400
432
  const loadHierarchicalConfig = async (options) => {
401
433
  const discovered = await discoverConfigDirectories(options);
402
434
  if (discovered.length === 0) {
@@ -411,21 +443,19 @@ const loadHierarchicalConfig = async (options) => {
411
443
  const contextDirs = [];
412
444
  for (const dir of sortedDirs) {
413
445
  const configPath = path.join(dir.path, options.configFileName);
446
+ let parsedConfig = null;
414
447
  try {
415
448
  const content = await fs$1.readFile(configPath, "utf-8");
416
449
  const parsed = yaml.load(content);
417
450
  if (parsed && typeof parsed === "object") {
418
- configs.push(parsed);
451
+ parsedConfig = parsed;
452
+ configs.push(parsedConfig);
419
453
  }
420
454
  } catch {
421
455
  }
422
- const contextDir = path.join(dir.path, "context");
423
- try {
424
- const stat = await fs$1.stat(contextDir);
425
- if (stat.isDirectory()) {
426
- contextDirs.push(contextDir);
427
- }
428
- } catch {
456
+ const contextDir = await resolveContextDirectory(dir.path, parsedConfig);
457
+ if (contextDir) {
458
+ contextDirs.push(contextDir);
429
459
  }
430
460
  }
431
461
  const mergedConfig = configs.reduce(
@@ -567,12 +597,12 @@ const create$u = async (options = {}) => {
567
597
  }
568
598
  return related.sort((a, b) => a.distance - b.distance).map((r) => r.project);
569
599
  },
570
- saveEntity: async (entity) => {
600
+ saveEntity: async (entity, allowUpdate = false) => {
571
601
  const closestDir = discoveryResult.discoveredDirs.sort((a, b) => a.level - b.level)[0];
572
602
  if (!closestDir) {
573
603
  throw new Error("No .protokoll directory found. Run with --init-config to create one.");
574
604
  }
575
- await storage.save(entity, closestDir.path);
605
+ await storage.save(entity, closestDir.path, allowUpdate);
576
606
  },
577
607
  deleteEntity: async (entity) => {
578
608
  const filePath = storage.getEntityFilePath(entity.type, entity.id, discoveryResult.contextDirs);
@@ -2968,14 +2998,18 @@ const create$g = (logger) => {
2968
2998
  throw new Error(`Failed to split audio file ${filePath}: ${error}`);
2969
2999
  }
2970
3000
  };
2971
- const convertToSupportedFormat = async (filePath, outputDir) => {
3001
+ const convertToSupportedFormat = async (filePath, outputDir, forceConversion = false) => {
2972
3002
  try {
2973
3003
  const fileExt = path__default.extname(filePath).toLowerCase();
2974
3004
  const supportedFormats = [".flac", ".m4a", ".mp3", ".mp4", ".mpeg", ".mpga", ".oga", ".ogg", ".wav", ".webm"];
2975
- if (supportedFormats.includes(fileExt)) {
3005
+ if (supportedFormats.includes(fileExt) && !forceConversion) {
2976
3006
  logger.debug(`File ${filePath} is already in a supported format: ${fileExt}`);
2977
3007
  return filePath;
2978
3008
  }
3009
+ if (forceConversion && fileExt === ".mp3") {
3010
+ logger.debug(`File ${filePath} is already MP3 (compressed format)`);
3011
+ return filePath;
3012
+ }
2979
3013
  logger.info(`Converting ${fileExt} file to mp3 for transcription...`);
2980
3014
  const fileName = path__default.basename(filePath, fileExt);
2981
3015
  const outputPath = path__default.join(outputDir, `${fileName}.mp3`);
@@ -3043,25 +3077,80 @@ const create$f = (openai) => {
3043
3077
  const transcribe = async (request) => {
3044
3078
  const { audioFile, config } = request;
3045
3079
  logger.debug("Starting transcription", { model: config.model, file: audioFile });
3080
+ const MAX_AUDIO_SIZE = 26214400;
3046
3081
  const tempDir = path.join(os.tmpdir(), "protokoll-conversions");
3047
- const convertedAudioFile = await media.convertToSupportedFormat(audioFile, tempDir);
3082
+ const originalFileSize = await media.getFileSize(audioFile);
3083
+ const originalFileSizeMB = (originalFileSize / (1024 * 1024)).toFixed(1);
3084
+ logger.debug(`Original audio file size: ${originalFileSize} bytes (${originalFileSizeMB} MB)`);
3085
+ const needsConversion = originalFileSize > MAX_AUDIO_SIZE * 0.95;
3086
+ const convertedAudioFile = needsConversion ? await media.convertToSupportedFormat(audioFile, tempDir, true) : await media.convertToSupportedFormat(audioFile, tempDir);
3048
3087
  logger.debug(`Using audio file for transcription: ${convertedAudioFile}`);
3049
- const audioStream = await storage.readStream(convertedAudioFile);
3050
- const startTime = Date.now();
3051
- const response = await openai.audio.transcriptions.create({
3052
- model: config.model,
3053
- file: audioStream,
3054
- response_format: config.response_format ?? "json",
3055
- ...config.language && { language: config.language },
3056
- ...config.temperature !== void 0 && { temperature: config.temperature },
3057
- ...config.prompt && { prompt: config.prompt }
3058
- });
3059
- const duration = Date.now() - startTime;
3060
- logger.debug("Transcription complete", { duration, model: config.model });
3088
+ const fileSize = await media.getFileSize(convertedAudioFile);
3089
+ const fileSizeMB = (fileSize / (1024 * 1024)).toFixed(1);
3090
+ logger.debug(`Audio file size: ${fileSize} bytes (${fileSizeMB} MB), max size: ${MAX_AUDIO_SIZE} bytes`);
3091
+ let transcriptionText;
3092
+ let totalDuration = 0;
3093
+ if (fileSize > MAX_AUDIO_SIZE) {
3094
+ logger.info(`Audio file exceeds maximum size (${fileSize} > ${MAX_AUDIO_SIZE} bytes), splitting into chunks`);
3095
+ const splitTempDir = path.join(tempDir, `split_audio_${Date.now()}`);
3096
+ await storage.createDirectory(splitTempDir);
3097
+ try {
3098
+ const audioChunks = await media.splitAudioFile(convertedAudioFile, splitTempDir, MAX_AUDIO_SIZE);
3099
+ logger.info(`Split audio file into ${audioChunks.length} chunks`);
3100
+ const transcriptions = [];
3101
+ for (let i = 0; i < audioChunks.length; i++) {
3102
+ const chunkPath = audioChunks[i];
3103
+ logger.info(`Transcribing chunk ${i + 1}/${audioChunks.length}: ${chunkPath}`);
3104
+ const chunkStream = await storage.readStream(chunkPath);
3105
+ const chunkStartTime = Date.now();
3106
+ const chunkResponse = await openai.audio.transcriptions.create({
3107
+ model: config.model,
3108
+ file: chunkStream,
3109
+ response_format: config.response_format ?? "json",
3110
+ ...config.language && { language: config.language },
3111
+ ...config.temperature !== void 0 && { temperature: config.temperature },
3112
+ ...config.prompt && { prompt: config.prompt }
3113
+ });
3114
+ const chunkDuration = Date.now() - chunkStartTime;
3115
+ totalDuration += chunkDuration;
3116
+ transcriptions.push(chunkResponse.text);
3117
+ }
3118
+ transcriptionText = transcriptions.join(" ");
3119
+ for (const chunk of audioChunks) {
3120
+ try {
3121
+ await storage.deleteFile(chunk);
3122
+ } catch (error) {
3123
+ logger.warn(`Failed to delete temporary chunk ${chunk}: ${error}`);
3124
+ }
3125
+ }
3126
+ try {
3127
+ await storage.deleteDirectory(splitTempDir);
3128
+ } catch (error) {
3129
+ logger.warn(`Failed to delete temporary split directory ${splitTempDir}: ${error}`);
3130
+ }
3131
+ } catch (error) {
3132
+ logger.error(`Error processing split audio files: ${error}`);
3133
+ throw new Error(`Failed to process split audio files: ${error}`);
3134
+ }
3135
+ } else {
3136
+ const audioStream = await storage.readStream(convertedAudioFile);
3137
+ const startTime = Date.now();
3138
+ const response = await openai.audio.transcriptions.create({
3139
+ model: config.model,
3140
+ file: audioStream,
3141
+ response_format: config.response_format ?? "json",
3142
+ ...config.language && { language: config.language },
3143
+ ...config.temperature !== void 0 && { temperature: config.temperature },
3144
+ ...config.prompt && { prompt: config.prompt }
3145
+ });
3146
+ totalDuration = Date.now() - startTime;
3147
+ transcriptionText = response.text;
3148
+ }
3149
+ logger.debug("Transcription complete", { duration: totalDuration, model: config.model });
3061
3150
  return {
3062
- text: response.text,
3151
+ text: transcriptionText,
3063
3152
  model: config.model,
3064
- duration
3153
+ duration: totalDuration
3065
3154
  };
3066
3155
  };
3067
3156
  return {
@@ -5524,7 +5613,9 @@ const combineTranscripts = async (filePaths, options = {}) => {
5524
5613
  });
5525
5614
  const firstTranscript = transcripts[0];
5526
5615
  const baseMetadata = { ...firstTranscript.metadata };
5527
- const context = await create$u();
5616
+ const context = await create$u({
5617
+ startingDir: options.contextDirectory || path.dirname(firstTranscript.filePath)
5618
+ });
5528
5619
  let targetProject;
5529
5620
  if (options.projectId) {
5530
5621
  targetProject = context.getProject(options.projectId);
@@ -5697,7 +5788,9 @@ const parseFilePaths = (input) => {
5697
5788
  };
5698
5789
  const editTranscript = async (filePath, options) => {
5699
5790
  const transcript = await parseTranscript(filePath);
5700
- const context = await create$u();
5791
+ const context = await create$u({
5792
+ startingDir: options.contextDirectory || path.dirname(filePath)
5793
+ });
5701
5794
  let targetProject;
5702
5795
  if (options.projectId) {
5703
5796
  targetProject = context.getProject(options.projectId);