@promptbook/markitdown 0.88.0-1 → 0.88.0-11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -116,6 +116,14 @@ Promptbook project is ecosystem of multiple projects and tools, following is a l
116
116
  </tbody>
117
117
  </table>
118
118
 
119
+ Hello world examples:
120
+
121
+ - [Hello world](https://github.com/webgptorg/hello-world)
122
+ - [Hello world in Node.js](https://github.com/webgptorg/hello-world-node-js)
123
+ - [Hello world in Next.js](https://github.com/webgptorg/hello-world-next-js)
124
+
125
+
126
+
119
127
  We also have a community of developers and users of **Promptbook**:
120
128
 
121
129
  - [Discord community](https://discord.gg/x3QWNaa89N)
@@ -282,16 +290,9 @@ Or you can install them separately:
282
290
 
283
291
  ## 📚 Dictionary
284
292
 
285
-
286
-
287
-
288
-
289
-
290
- ### 📚 Dictionary
291
-
292
293
  The following glossary is used to clarify certain concepts:
293
294
 
294
- #### General LLM / AI terms
295
+ ### General LLM / AI terms
295
296
 
296
297
  - **Prompt drift** is a phenomenon where the AI model starts to generate outputs that are not aligned with the original prompt. This can happen due to the model's training data, the prompt's wording, or the model's architecture.
297
298
  - **Pipeline, workflow or chain** is a sequence of tasks that are executed in a specific order. In the context of AI, a pipeline can refer to a sequence of AI models that are used to process data.
@@ -304,11 +305,11 @@ The following glossary is used to clarify certain concepts:
304
305
 
305
306
 
306
307
 
307
- _Note: Thos section is not complete dictionary, more list of general AI / LLM terms that has connection with Promptbook_
308
+ _Note: This section is not complete dictionary, more list of general AI / LLM terms that has connection with Promptbook_
308
309
 
309
310
 
310
311
 
311
- #### 💯 Core concepts
312
+ ### 💯 Core concepts
312
313
 
313
314
  - [📚 Collection of pipelines](https://github.com/webgptorg/promptbook/discussions/65)
314
315
  - [📯 Pipeline](https://github.com/webgptorg/promptbook/discussions/64)
@@ -321,7 +322,7 @@ _Note: Thos section is not complete dictionary, more list of general AI / LLM te
321
322
  - [🔣 Words not tokens](https://github.com/webgptorg/promptbook/discussions/29)
322
323
  - [☯ Separation of concerns](https://github.com/webgptorg/promptbook/discussions/32)
323
324
 
324
- ##### Advanced concepts
325
+ #### Advanced concepts
325
326
 
326
327
  - [📚 Knowledge (Retrieval-augmented generation)](https://github.com/webgptorg/promptbook/discussions/41)
327
328
  - [🌏 Remote server](https://github.com/webgptorg/promptbook/discussions/89)
@@ -338,17 +339,9 @@ _Note: Thos section is not complete dictionary, more list of general AI / LLM te
338
339
 
339
340
 
340
341
 
341
- ### Terms specific to Promptbook TypeScript implementation
342
-
343
- - Anonymous mode
344
- - Application mode
345
-
342
+ ## 🚂 Promptbook Engine
346
343
 
347
-
348
- ## 🔌 Usage in Typescript / Javascript
349
-
350
- - [Simple usage](./examples/usage/simple-script)
351
- - [Usage with client and remote server](./examples/usage/remote)
344
+ ![Schema of Promptbook Engine](./documents/promptbook-engine.svg)
352
345
 
353
346
  ## ➕➖ When to use Promptbook?
354
347
 
@@ -414,13 +407,13 @@ See [TODO.md](./TODO.md)
414
407
  <div style="display: flex; align-items: center; gap: 20px;">
415
408
 
416
409
  <a href="https://promptbook.studio/">
417
- <img src="./design/promptbook-studio-logo.png" alt="Partner 3" height="100">
410
+ <img src="./design/promptbook-studio-logo.png" alt="Partner 3" height="70">
418
411
  </a>
419
412
 
420
413
  <a href="https://technologickainkubace.org/en/about-technology-incubation/about-the-project/">
421
- <img src="./other/partners/CI-Technology-Incubation.png" alt="Technology Incubation" height="100">
414
+ <img src="./other/partners/CI-Technology-Incubation.png" alt="Technology Incubation" height="70">
422
415
  </a>
423
-
416
+
424
417
  </div>
425
418
 
426
419
  ## 🖋️ Contributing
package/esm/index.es.js CHANGED
@@ -5,7 +5,7 @@ import hexEncoder from 'crypto-js/enc-hex';
5
5
  import { basename, join, dirname } from 'path';
6
6
  import { format } from 'prettier';
7
7
  import parserHtml from 'prettier/parser-html';
8
- import { BehaviorSubject } from 'rxjs';
8
+ import { Subject } from 'rxjs';
9
9
  import { randomBytes } from 'crypto';
10
10
  import { forTime } from 'waitasecond';
11
11
  import sha256 from 'crypto-js/sha256';
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.88.0-1';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.88.0-11';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -2068,6 +2068,36 @@ function $randomToken(randomness) {
2068
2068
  * TODO: Maybe use nanoid instead https://github.com/ai/nanoid
2069
2069
  */
2070
2070
 
2071
+ /**
2072
+ * Recursively converts JSON strings to JSON objects
2073
+
2074
+ * @public exported from `@promptbook/utils`
2075
+ */
2076
+ function jsonStringsToJsons(object) {
2077
+ if (object === null) {
2078
+ return object;
2079
+ }
2080
+ if (Array.isArray(object)) {
2081
+ return object.map(jsonStringsToJsons);
2082
+ }
2083
+ if (typeof object !== 'object') {
2084
+ return object;
2085
+ }
2086
+ const newObject = { ...object };
2087
+ for (const [key, value] of Object.entries(object)) {
2088
+ if (typeof value === 'string' && isValidJsonString(value)) {
2089
+ newObject[key] = JSON.parse(value);
2090
+ }
2091
+ else {
2092
+ newObject[key] = jsonStringsToJsons(value);
2093
+ }
2094
+ }
2095
+ return newObject;
2096
+ }
2097
+ /**
2098
+ * TODO: Type the return type correctly
2099
+ */
2100
+
2071
2101
  /**
2072
2102
  * This error indicates problems parsing the format value
2073
2103
  *
@@ -2294,21 +2324,43 @@ function assertsTaskSuccessful(executionResult) {
2294
2324
  function createTask(options) {
2295
2325
  const { taskType, taskProcessCallback } = options;
2296
2326
  const taskId = `${taskType.toLowerCase().substring(0, 4)}-${$randomToken(8 /* <- TODO: To global config + Use Base58 to avoid simmilar char conflicts */)}`;
2297
- const partialResultSubject = new BehaviorSubject({});
2327
+ let status = 'RUNNING';
2328
+ const createdAt = new Date();
2329
+ let updatedAt = createdAt;
2330
+ const errors = [];
2331
+ const warnings = [];
2332
+ let currentValue = {};
2333
+ const partialResultSubject = new Subject();
2334
+ // <- Note: Not using `BehaviorSubject` because on error we can't access the last value
2298
2335
  const finalResultPromise = /* not await */ taskProcessCallback((newOngoingResult) => {
2336
+ Object.assign(currentValue, newOngoingResult);
2337
+ // <- TODO: assign deep
2299
2338
  partialResultSubject.next(newOngoingResult);
2300
2339
  });
2301
2340
  finalResultPromise
2302
2341
  .catch((error) => {
2342
+ errors.push(error);
2303
2343
  partialResultSubject.error(error);
2304
2344
  })
2305
- .then((value) => {
2306
- if (value) {
2345
+ .then((executionResult) => {
2346
+ if (executionResult) {
2307
2347
  try {
2308
- assertsTaskSuccessful(value);
2309
- partialResultSubject.next(value);
2348
+ updatedAt = new Date();
2349
+ errors.push(...executionResult.errors);
2350
+ warnings.push(...executionResult.warnings);
2351
+ // <- TODO: !!! Only unique errors and warnings should be added (or filtered)
2352
+ // TODO: [🧠] !!! errors, warning, isSuccessful are redundant both in `ExecutionTask` and `ExecutionTask.currentValue`
2353
+ // Also maybe move `ExecutionTask.currentValue.usage` -> `ExecutionTask.usage`
2354
+ // And delete `ExecutionTask.currentValue.preparedPipeline`
2355
+ assertsTaskSuccessful(executionResult);
2356
+ status = 'FINISHED';
2357
+ currentValue = jsonStringsToJsons(executionResult);
2358
+ // <- TODO: [🧠] Is this a good idea to convert JSON strins to JSONs?
2359
+ partialResultSubject.next(executionResult);
2310
2360
  }
2311
2361
  catch (error) {
2362
+ status = 'ERROR';
2363
+ errors.push(error);
2312
2364
  partialResultSubject.error(error);
2313
2365
  }
2314
2366
  }
@@ -2325,12 +2377,33 @@ function createTask(options) {
2325
2377
  return {
2326
2378
  taskType,
2327
2379
  taskId,
2380
+ get status() {
2381
+ return status;
2382
+ // <- Note: [1] Theese must be getters to allow changing the value in the future
2383
+ },
2384
+ get createdAt() {
2385
+ return createdAt;
2386
+ // <- Note: [1]
2387
+ },
2388
+ get updatedAt() {
2389
+ return updatedAt;
2390
+ // <- Note: [1]
2391
+ },
2328
2392
  asPromise,
2329
2393
  asObservable() {
2330
2394
  return partialResultSubject.asObservable();
2331
2395
  },
2396
+ get errors() {
2397
+ return errors;
2398
+ // <- Note: [1]
2399
+ },
2400
+ get warnings() {
2401
+ return warnings;
2402
+ // <- Note: [1]
2403
+ },
2332
2404
  get currentValue() {
2333
- return partialResultSubject.value;
2405
+ return currentValue;
2406
+ // <- Note: [1]
2334
2407
  },
2335
2408
  };
2336
2409
  }
@@ -4705,7 +4778,7 @@ async function executeAttempts(options) {
4705
4778
  Last result:
4706
4779
  ${block($ongoingTaskResult.$resultString === null
4707
4780
  ? 'null'
4708
- : $ongoingTaskResult.$resultString
4781
+ : spaceTrim$1($ongoingTaskResult.$resultString)
4709
4782
  .split('\n')
4710
4783
  .map((line) => `> ${line}`)
4711
4784
  .join('\n'))}