@kadoa/node-sdk 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,8 +1,184 @@
1
+ import { EventEmitter } from 'events';
1
2
  import globalAxios2, { AxiosError } from 'axios';
3
+ import { merge } from 'es-toolkit/object';
2
4
  import { URL as URL$1, URLSearchParams } from 'url';
3
- import { EventEmitter } from 'events';
4
5
 
5
- // src/kadoa-sdk.ts
6
+ // src/events/index.ts
7
+ var KadoaEventEmitter = class extends EventEmitter {
8
+ /**
9
+ * Emit a typed SDK event
10
+ */
11
+ emit(eventName, payload, source = "sdk", metadata) {
12
+ const event = {
13
+ type: eventName,
14
+ timestamp: /* @__PURE__ */ new Date(),
15
+ source,
16
+ payload,
17
+ metadata
18
+ };
19
+ return super.emit("event", event);
20
+ }
21
+ /**
22
+ * Subscribe to SDK events
23
+ */
24
+ onEvent(listener) {
25
+ return super.on("event", listener);
26
+ }
27
+ /**
28
+ * Subscribe to SDK events (once)
29
+ */
30
+ onceEvent(listener) {
31
+ return super.once("event", listener);
32
+ }
33
+ /**
34
+ * Unsubscribe from SDK events
35
+ */
36
+ offEvent(listener) {
37
+ return super.off("event", listener);
38
+ }
39
+ /**
40
+ * Remove all event listeners
41
+ */
42
+ removeAllEventListeners() {
43
+ return super.removeAllListeners("event");
44
+ }
45
+ };
46
+
47
+ // src/exceptions/kadoa-sdk.exception.ts
48
+ var KadoaSdkException = class _KadoaSdkException extends Error {
49
+ constructor(message, options) {
50
+ super(message);
51
+ this.name = "KadoaSdkException";
52
+ this.code = options?.code ?? "UNKNOWN";
53
+ this.details = options?.details;
54
+ if (options && "cause" in options) this.cause = options.cause;
55
+ Error.captureStackTrace?.(this, _KadoaSdkException);
56
+ }
57
+ static from(error, details) {
58
+ if (error instanceof _KadoaSdkException) return error;
59
+ const message = error instanceof Error ? error.message : typeof error === "string" ? error : "Unexpected error";
60
+ return new _KadoaSdkException(message, {
61
+ code: "UNKNOWN",
62
+ details,
63
+ cause: error
64
+ });
65
+ }
66
+ toJSON() {
67
+ return {
68
+ name: this.name,
69
+ message: this.message,
70
+ code: this.code,
71
+ details: this.details
72
+ };
73
+ }
74
+ toString() {
75
+ return [this.name, this.code, this.message].filter(Boolean).join(": ");
76
+ }
77
+ };
78
+
79
+ // src/exceptions/http.exception.ts
80
+ var KadoaHttpException = class _KadoaHttpException extends KadoaSdkException {
81
+ constructor(message, options) {
82
+ super(message, {
83
+ code: options?.code,
84
+ details: options?.details,
85
+ cause: options?.cause
86
+ });
87
+ this.name = "KadoaHttpException";
88
+ this.httpStatus = options?.httpStatus;
89
+ this.requestId = options?.requestId;
90
+ this.endpoint = options?.endpoint;
91
+ this.method = options?.method;
92
+ this.responseBody = options?.responseBody;
93
+ }
94
+ static fromAxiosError(error, extra) {
95
+ const status = error.response?.status;
96
+ const requestId = error.response?.headers?.["x-request-id"] || error.response?.headers?.["x-amzn-requestid"];
97
+ const method = error.config?.method?.toUpperCase();
98
+ const url = error.config?.url;
99
+ return new _KadoaHttpException(extra?.message || error.message, {
100
+ code: _KadoaHttpException.mapStatusToCode(error),
101
+ httpStatus: status,
102
+ requestId,
103
+ endpoint: url,
104
+ method,
105
+ responseBody: error.response?.data,
106
+ details: extra?.details,
107
+ cause: error
108
+ });
109
+ }
110
+ toJSON() {
111
+ return {
112
+ ...super.toJSON(),
113
+ httpStatus: this.httpStatus,
114
+ requestId: this.requestId,
115
+ endpoint: this.endpoint,
116
+ method: this.method,
117
+ responseBody: this.responseBody
118
+ };
119
+ }
120
+ static mapStatusToCode(error) {
121
+ const status = error.response?.status;
122
+ if (!status) {
123
+ return error.code === "ECONNABORTED" ? "TIMEOUT" : error.request ? "NETWORK_ERROR" : "UNKNOWN";
124
+ }
125
+ if (status === 401 || status === 403) return "AUTH_ERROR";
126
+ if (status === 404) return "NOT_FOUND";
127
+ if (status === 408) return "TIMEOUT";
128
+ if (status === 429) return "RATE_LIMITED";
129
+ if (status >= 400 && status < 500) return "VALIDATION_ERROR";
130
+ if (status >= 500) return "HTTP_ERROR";
131
+ return "UNKNOWN";
132
+ }
133
+ };
134
+ function isKadoaSdkException(error) {
135
+ return error instanceof KadoaSdkException;
136
+ }
137
+ function isKadoaHttpException(error) {
138
+ return error instanceof KadoaHttpException;
139
+ }
140
+ function wrapKadoaError(error, extra) {
141
+ if (error instanceof AxiosError)
142
+ return KadoaHttpException.fromAxiosError(error, extra);
143
+ return KadoaSdkException.from(error, extra?.details);
144
+ }
145
+
146
+ // src/extraction/constants.ts
147
+ var DEFAULT_OPTIONS = {
148
+ pollingInterval: 5e3,
149
+ maxWaitTime: 3e5,
150
+ navigationMode: "single-page",
151
+ location: { type: "auto" },
152
+ name: "Untitled Workflow",
153
+ dataLimit: 100
154
+ };
155
+ var MAX_DATA_LIMIT = 99999;
156
+ var TERMINAL_RUN_STATES = /* @__PURE__ */ new Set([
157
+ "FINISHED",
158
+ "SUCCESS",
159
+ "FAILED",
160
+ "ERROR",
161
+ "STOPPED",
162
+ "CANCELLED"
163
+ ]);
164
+ var SUCCESSFUL_RUN_STATES = /* @__PURE__ */ new Set(["FINISHED", "SUCCESS"]);
165
+ var ENTITY_API_ENDPOINT = "/v4/entity";
166
+ var DEFAULT_API_BASE_URL = "https://api.kadoa.com";
167
+ var ERROR_MESSAGES = {
168
+ NO_URLS: "At least one URL is required for extraction",
169
+ NO_API_KEY: "API key is required for entity detection",
170
+ LINK_REQUIRED: "Link is required for entity field detection",
171
+ NO_WORKFLOW_ID: "Failed to start extraction process - no ID received",
172
+ NO_PREDICTIONS: "No entity predictions returned from the API",
173
+ PARSE_ERROR: "Failed to parse entity response",
174
+ NETWORK_ERROR: "Network error while fetching entity fields",
175
+ AUTH_FAILED: "Authentication failed. Please check your API key",
176
+ RATE_LIMITED: "Rate limit exceeded. Please try again later",
177
+ SERVER_ERROR: "Server error while fetching entity fields",
178
+ DATA_FETCH_FAILED: "Failed to retrieve extracted data from workflow",
179
+ PROGRESS_CHECK_FAILED: "Failed to check extraction progress",
180
+ EXTRACTION_FAILED: "Data extraction failed for the provided URLs"
181
+ };
6
182
  var BASE_PATH = "https://api.kadoa.com".replace(/\/+$/, "");
7
183
  var BaseAPI = class {
8
184
  constructor(configuration, basePath = BASE_PATH, axios2 = globalAxios2) {
@@ -1687,180 +1863,6 @@ var Configuration = class {
1687
1863
  return mime !== null && (jsonMime.test(mime) || mime.toLowerCase() === "application/json-patch+json");
1688
1864
  }
1689
1865
  };
1690
- var KadoaEventEmitter = class extends EventEmitter {
1691
- /**
1692
- * Emit a typed SDK event
1693
- */
1694
- emit(eventName, payload, source = "sdk", metadata) {
1695
- const event = {
1696
- type: eventName,
1697
- timestamp: /* @__PURE__ */ new Date(),
1698
- source,
1699
- payload,
1700
- metadata
1701
- };
1702
- return super.emit("event", event);
1703
- }
1704
- /**
1705
- * Subscribe to SDK events
1706
- */
1707
- onEvent(listener) {
1708
- return super.on("event", listener);
1709
- }
1710
- /**
1711
- * Subscribe to SDK events (once)
1712
- */
1713
- onceEvent(listener) {
1714
- return super.once("event", listener);
1715
- }
1716
- /**
1717
- * Unsubscribe from SDK events
1718
- */
1719
- offEvent(listener) {
1720
- return super.off("event", listener);
1721
- }
1722
- /**
1723
- * Remove all event listeners
1724
- */
1725
- removeAllEventListeners() {
1726
- return super.removeAllListeners("event");
1727
- }
1728
- };
1729
-
1730
- // src/kadoa-sdk.ts
1731
- function initializeSdk(config) {
1732
- const baseUrl = config.baseUrl || "https://api.kadoa.com";
1733
- const configParams = {
1734
- apiKey: config.apiKey,
1735
- basePath: baseUrl
1736
- };
1737
- const configuration = new Configuration(configParams);
1738
- const axiosInstance = globalAxios2.create({
1739
- timeout: config.timeout || 3e4
1740
- });
1741
- const events = new KadoaEventEmitter();
1742
- return {
1743
- configuration,
1744
- axiosInstance,
1745
- baseUrl,
1746
- events,
1747
- emit: (eventName, payload, source, metadata) => {
1748
- events.emit(eventName, payload, source, metadata);
1749
- },
1750
- onEvent: (listener) => {
1751
- events.onEvent(listener);
1752
- },
1753
- offEvent: (listener) => {
1754
- events.offEvent(listener);
1755
- }
1756
- };
1757
- }
1758
- function dispose(sdkInstance) {
1759
- if (sdkInstance?.events) {
1760
- sdkInstance.events.removeAllListeners();
1761
- }
1762
- }
1763
-
1764
- // src/exceptions/kadoa-sdk.exception.ts
1765
- var KadoaSdkException = class _KadoaSdkException extends Error {
1766
- constructor(message, options) {
1767
- super(message);
1768
- this.name = "KadoaSdkException";
1769
- this.code = options?.code ?? "UNKNOWN";
1770
- this.details = options?.details;
1771
- if (options && "cause" in options) this.cause = options.cause;
1772
- Error.captureStackTrace?.(this, _KadoaSdkException);
1773
- }
1774
- static from(error, details) {
1775
- if (error instanceof _KadoaSdkException) return error;
1776
- const message = error instanceof Error ? error.message : typeof error === "string" ? error : "Unexpected error";
1777
- return new _KadoaSdkException(message, {
1778
- code: "UNKNOWN",
1779
- details,
1780
- cause: error
1781
- });
1782
- }
1783
- toJSON() {
1784
- return {
1785
- name: this.name,
1786
- message: this.message,
1787
- code: this.code,
1788
- details: this.details
1789
- };
1790
- }
1791
- toString() {
1792
- return [this.name, this.code, this.message].filter(Boolean).join(": ");
1793
- }
1794
- };
1795
-
1796
- // src/exceptions/http.exception.ts
1797
- var KadoaHttpException = class _KadoaHttpException extends KadoaSdkException {
1798
- constructor(message, options) {
1799
- super(message, {
1800
- code: options?.code,
1801
- details: options?.details,
1802
- cause: options?.cause
1803
- });
1804
- this.name = "KadoaHttpException";
1805
- this.httpStatus = options?.httpStatus;
1806
- this.requestId = options?.requestId;
1807
- this.endpoint = options?.endpoint;
1808
- this.method = options?.method;
1809
- this.responseBody = options?.responseBody;
1810
- }
1811
- static fromAxiosError(error, extra) {
1812
- const status = error.response?.status;
1813
- const requestId = error.response?.headers?.["x-request-id"] || error.response?.headers?.["x-amzn-requestid"];
1814
- const method = error.config?.method?.toUpperCase();
1815
- const url = error.config?.url;
1816
- return new _KadoaHttpException(extra?.message || error.message, {
1817
- code: _KadoaHttpException.mapStatusToCode(error),
1818
- httpStatus: status,
1819
- requestId,
1820
- endpoint: url,
1821
- method,
1822
- responseBody: error.response?.data,
1823
- details: extra?.details,
1824
- cause: error
1825
- });
1826
- }
1827
- toJSON() {
1828
- return {
1829
- ...super.toJSON(),
1830
- httpStatus: this.httpStatus,
1831
- requestId: this.requestId,
1832
- endpoint: this.endpoint,
1833
- method: this.method,
1834
- responseBody: this.responseBody
1835
- };
1836
- }
1837
- static mapStatusToCode(error) {
1838
- const status = error.response?.status;
1839
- if (!status) {
1840
- return error.code === "ECONNABORTED" ? "TIMEOUT" : error.request ? "NETWORK_ERROR" : "UNKNOWN";
1841
- }
1842
- if (status === 401 || status === 403) return "AUTH_ERROR";
1843
- if (status === 404) return "NOT_FOUND";
1844
- if (status === 408) return "TIMEOUT";
1845
- if (status === 429) return "RATE_LIMITED";
1846
- if (status >= 400 && status < 500) return "VALIDATION_ERROR";
1847
- if (status >= 500) return "HTTP_ERROR";
1848
- return "UNKNOWN";
1849
- }
1850
- };
1851
-
1852
- // src/exceptions/utils.ts
1853
- function isKadoaSdkException(error) {
1854
- return error instanceof KadoaSdkException;
1855
- }
1856
- function isKadoaHttpException(error) {
1857
- return error instanceof KadoaHttpException;
1858
- }
1859
- function wrapKadoaError(error, extra) {
1860
- if (error instanceof AxiosError)
1861
- return KadoaHttpException.fromAxiosError(error, extra);
1862
- return KadoaSdkException.from(error, extra?.details);
1863
- }
1864
1866
 
1865
1867
  // src/api-client.ts
1866
1868
  var workflowsApiCache = /* @__PURE__ */ new WeakMap();
@@ -1873,20 +1875,37 @@ function getWorkflowsApi(app) {
1873
1875
  return api;
1874
1876
  }
1875
1877
 
1876
- // src/extraction/detect-entity.ts
1877
- async function fetchEntityFields(app, options) {
1878
+ // src/extraction/data-fetcher.ts
1879
+ async function fetchWorkflowData(sdkInstance, workflowId, limit = DEFAULT_OPTIONS.dataLimit) {
1880
+ const workflowsApi = getWorkflowsApi(sdkInstance);
1881
+ try {
1882
+ const response = await workflowsApi.v4WorkflowsWorkflowIdDataGet({
1883
+ workflowId,
1884
+ limit
1885
+ });
1886
+ return response.data.data ?? [];
1887
+ } catch (error) {
1888
+ throw wrapKadoaError(error, {
1889
+ message: ERROR_MESSAGES.DATA_FETCH_FAILED,
1890
+ details: { workflowId, limit }
1891
+ });
1892
+ }
1893
+ }
1894
+
1895
+ // src/extraction/entity-detector.ts
1896
+ function validateEntityOptions(options) {
1878
1897
  if (!options.link) {
1879
- throw new KadoaSdkException("Link is required for entity field detection", {
1898
+ throw new KadoaSdkException(ERROR_MESSAGES.LINK_REQUIRED, {
1880
1899
  code: "VALIDATION_ERROR",
1881
1900
  details: { options }
1882
1901
  });
1883
1902
  }
1884
- const url = new URL("/v4/entity", app.baseUrl || "https://api.kadoa.com");
1903
+ }
1904
+ async function buildRequestHeaders(config) {
1885
1905
  const headers = {
1886
1906
  "Content-Type": "application/json",
1887
1907
  Accept: "application/json"
1888
1908
  };
1889
- const config = app.configuration;
1890
1909
  if (config?.apiKey) {
1891
1910
  if (typeof config.apiKey === "function") {
1892
1911
  const apiKeyValue = await config.apiKey("X-API-Key");
@@ -1897,16 +1916,71 @@ async function fetchEntityFields(app, options) {
1897
1916
  headers["X-API-Key"] = config.apiKey;
1898
1917
  }
1899
1918
  } else {
1900
- throw new KadoaSdkException("API key is required for entity detection", {
1919
+ throw new KadoaSdkException(ERROR_MESSAGES.NO_API_KEY, {
1901
1920
  code: "AUTH_ERROR",
1902
1921
  details: { hasConfig: !!config, hasApiKey: !!config?.apiKey }
1903
1922
  });
1904
1923
  }
1905
- const requestBody = {
1906
- link: options.link,
1907
- location: options.location || { type: "auto" },
1908
- navigationMode: options.navigationMode || "single-page"
1924
+ return headers;
1925
+ }
1926
+ function getErrorCodeFromStatus(status) {
1927
+ if (status === 401 || status === 403) return "AUTH_ERROR";
1928
+ if (status === 404) return "NOT_FOUND";
1929
+ if (status === 429) return "RATE_LIMITED";
1930
+ if (status >= 400 && status < 500) return "VALIDATION_ERROR";
1931
+ if (status >= 500) return "HTTP_ERROR";
1932
+ return "UNKNOWN";
1933
+ }
1934
+ async function handleErrorResponse(response, url, link) {
1935
+ let errorData;
1936
+ let errorText = "";
1937
+ try {
1938
+ errorText = await response.text();
1939
+ errorData = JSON.parse(errorText);
1940
+ } catch {
1941
+ errorData = { message: errorText || response.statusText };
1942
+ }
1943
+ const baseErrorOptions = {
1944
+ httpStatus: response.status,
1945
+ endpoint: url.toString(),
1946
+ method: "POST",
1947
+ responseBody: errorData,
1948
+ details: {
1949
+ url: url.toString(),
1950
+ link
1951
+ }
1909
1952
  };
1953
+ if (response.status === 401) {
1954
+ throw new KadoaHttpException(ERROR_MESSAGES.AUTH_FAILED, {
1955
+ ...baseErrorOptions,
1956
+ code: "AUTH_ERROR"
1957
+ });
1958
+ }
1959
+ if (response.status === 429) {
1960
+ throw new KadoaHttpException(ERROR_MESSAGES.RATE_LIMITED, {
1961
+ ...baseErrorOptions,
1962
+ code: "RATE_LIMITED"
1963
+ });
1964
+ }
1965
+ if (response.status >= 500) {
1966
+ throw new KadoaHttpException(ERROR_MESSAGES.SERVER_ERROR, {
1967
+ ...baseErrorOptions,
1968
+ code: "HTTP_ERROR"
1969
+ });
1970
+ }
1971
+ throw new KadoaHttpException(
1972
+ `Failed to fetch entity fields: ${errorData?.message || response.statusText}`,
1973
+ {
1974
+ ...baseErrorOptions,
1975
+ code: getErrorCodeFromStatus(response.status)
1976
+ }
1977
+ );
1978
+ }
1979
+ async function fetchEntityFields(app, options) {
1980
+ validateEntityOptions(options);
1981
+ const url = new URL(ENTITY_API_ENDPOINT, app.baseUrl || DEFAULT_API_BASE_URL);
1982
+ const headers = await buildRequestHeaders(app.configuration);
1983
+ const requestBody = options;
1910
1984
  let response;
1911
1985
  try {
1912
1986
  response = await fetch(url.toString(), {
@@ -1915,7 +1989,7 @@ async function fetchEntityFields(app, options) {
1915
1989
  body: JSON.stringify(requestBody)
1916
1990
  });
1917
1991
  } catch (error) {
1918
- throw new KadoaSdkException("Network error while fetching entity fields", {
1992
+ throw new KadoaSdkException(ERROR_MESSAGES.NETWORK_ERROR, {
1919
1993
  code: "NETWORK_ERROR",
1920
1994
  details: {
1921
1995
  url: url.toString(),
@@ -1925,89 +1999,13 @@ async function fetchEntityFields(app, options) {
1925
1999
  });
1926
2000
  }
1927
2001
  if (!response.ok) {
1928
- let errorData;
1929
- let errorText = "";
1930
- try {
1931
- errorText = await response.text();
1932
- errorData = JSON.parse(errorText);
1933
- } catch {
1934
- errorData = { message: errorText || response.statusText };
1935
- }
1936
- const getErrorCode = (status) => {
1937
- if (status === 401 || status === 403) return "AUTH_ERROR";
1938
- if (status === 404) return "NOT_FOUND";
1939
- if (status === 429) return "RATE_LIMITED";
1940
- if (status >= 400 && status < 500) return "VALIDATION_ERROR";
1941
- if (status >= 500) return "HTTP_ERROR";
1942
- return "UNKNOWN";
1943
- };
1944
- if (response.status === 401) {
1945
- throw new KadoaHttpException(
1946
- "Authentication failed. Please check your API key",
1947
- {
1948
- code: "AUTH_ERROR",
1949
- httpStatus: response.status,
1950
- endpoint: url.toString(),
1951
- method: "POST",
1952
- responseBody: errorData,
1953
- details: {
1954
- url: url.toString(),
1955
- link: options.link
1956
- }
1957
- }
1958
- );
1959
- } else if (response.status === 429) {
1960
- throw new KadoaHttpException(
1961
- "Rate limit exceeded. Please try again later",
1962
- {
1963
- code: "RATE_LIMITED",
1964
- httpStatus: response.status,
1965
- endpoint: url.toString(),
1966
- method: "POST",
1967
- responseBody: errorData,
1968
- details: {
1969
- url: url.toString(),
1970
- link: options.link
1971
- }
1972
- }
1973
- );
1974
- } else if (response.status >= 500) {
1975
- throw new KadoaHttpException(
1976
- "Server error while fetching entity fields",
1977
- {
1978
- code: "HTTP_ERROR",
1979
- httpStatus: response.status,
1980
- endpoint: url.toString(),
1981
- method: "POST",
1982
- responseBody: errorData,
1983
- details: {
1984
- url: url.toString(),
1985
- link: options.link
1986
- }
1987
- }
1988
- );
1989
- } else {
1990
- throw new KadoaHttpException(
1991
- `Failed to fetch entity fields: ${errorData?.message || response.statusText}`,
1992
- {
1993
- code: getErrorCode(response.status),
1994
- httpStatus: response.status,
1995
- endpoint: url.toString(),
1996
- method: "POST",
1997
- responseBody: errorData,
1998
- details: {
1999
- url: url.toString(),
2000
- link: options.link
2001
- }
2002
- }
2003
- );
2004
- }
2002
+ await handleErrorResponse(response, url, options.link);
2005
2003
  }
2006
2004
  let data;
2007
2005
  try {
2008
2006
  data = await response.json();
2009
2007
  } catch (error) {
2010
- throw new KadoaSdkException("Failed to parse entity response", {
2008
+ throw new KadoaSdkException(ERROR_MESSAGES.PARSE_ERROR, {
2011
2009
  code: "INTERNAL_ERROR",
2012
2010
  details: {
2013
2011
  url: url.toString(),
@@ -2017,7 +2015,7 @@ async function fetchEntityFields(app, options) {
2017
2015
  });
2018
2016
  }
2019
2017
  if (!data.success || !data.entityPrediction || data.entityPrediction.length === 0) {
2020
- throw new KadoaSdkException("No entity predictions returned from the API", {
2018
+ throw new KadoaSdkException(ERROR_MESSAGES.NO_PREDICTIONS, {
2021
2019
  code: "NOT_FOUND",
2022
2020
  details: {
2023
2021
  success: data.success,
@@ -2030,150 +2028,148 @@ async function fetchEntityFields(app, options) {
2030
2028
  return data.entityPrediction[0];
2031
2029
  }
2032
2030
 
2033
- // src/extraction/run-extraction.ts
2034
- var TERMINAL_RUN_STATES = /* @__PURE__ */ new Set([
2035
- "FINISHED",
2036
- "SUCCESS",
2037
- "FAILED",
2038
- "ERROR",
2039
- "STOPPED",
2040
- "CANCELLED"
2041
- ]);
2031
+ // src/extraction/workflow-manager.ts
2042
2032
  function isTerminalRunState(runState) {
2043
2033
  if (!runState) return false;
2044
2034
  return TERMINAL_RUN_STATES.has(runState.toUpperCase());
2045
2035
  }
2046
- async function fetchWorkflowData(sdkInstance, workflowId, limit = 100) {
2036
+ async function createWorkflow(sdkInstance, config) {
2047
2037
  const workflowsApi = getWorkflowsApi(sdkInstance);
2038
+ const request = {
2039
+ urls: config.urls,
2040
+ navigationMode: config.navigationMode,
2041
+ entity: config.entity,
2042
+ name: config.name,
2043
+ fields: config.fields,
2044
+ bypassPreview: true,
2045
+ limit: MAX_DATA_LIMIT,
2046
+ tags: ["sdk"]
2047
+ };
2048
2048
  try {
2049
- const response = await workflowsApi.v4WorkflowsWorkflowIdDataGet({
2050
- workflowId,
2051
- limit
2049
+ const response = await workflowsApi.v4WorkflowsPost({
2050
+ v4WorkflowsPostRequest: request
2052
2051
  });
2053
- return response.data.data ?? [];
2052
+ const workflowId = response.data.workflowId;
2053
+ if (!workflowId) {
2054
+ throw new KadoaSdkException(ERROR_MESSAGES.NO_WORKFLOW_ID, {
2055
+ code: "INTERNAL_ERROR",
2056
+ details: { urls: config.urls }
2057
+ });
2058
+ }
2059
+ return workflowId;
2054
2060
  } catch (error) {
2055
- const e = wrapKadoaError(error, {
2056
- message: "Failed to retrieve extracted data from workflow",
2057
- details: { workflowId, limit }
2061
+ throw wrapKadoaError(error, {
2062
+ message: "Failed to create workflow",
2063
+ details: config
2058
2064
  });
2059
- throw e;
2060
2065
  }
2061
2066
  }
2062
- async function waitForWorkflowCompletion(sdkInstance, workflowId, pollingInterval = 5e3, maxWaitTime = 3e5) {
2067
+ async function getWorkflowStatus(sdkInstance, workflowId) {
2063
2068
  const workflowsApi = getWorkflowsApi(sdkInstance);
2069
+ try {
2070
+ const response = await workflowsApi.v4WorkflowsWorkflowIdGet({
2071
+ workflowId
2072
+ });
2073
+ return response.data;
2074
+ } catch (error) {
2075
+ throw wrapKadoaError(error, {
2076
+ message: ERROR_MESSAGES.PROGRESS_CHECK_FAILED,
2077
+ details: { workflowId }
2078
+ });
2079
+ }
2080
+ }
2081
+ async function waitForWorkflowCompletion(sdkInstance, workflowId, options) {
2082
+ const pollingInterval = options.pollingInterval;
2083
+ const maxWaitTime = options.maxWaitTime;
2064
2084
  const startTime = Date.now();
2065
2085
  let previousState;
2066
2086
  let previousRunState;
2067
2087
  while (Date.now() - startTime < maxWaitTime) {
2068
- try {
2069
- const response = await workflowsApi.v4WorkflowsWorkflowIdGet({
2070
- workflowId
2071
- });
2072
- const workflow = response.data;
2073
- if (workflow.state !== previousState || workflow.runState !== previousRunState) {
2074
- sdkInstance.emit(
2075
- "extraction:status_changed",
2076
- {
2077
- workflowId,
2078
- previousState,
2079
- previousRunState,
2080
- currentState: workflow.state,
2081
- currentRunState: workflow.runState
2082
- },
2083
- "extraction"
2084
- );
2085
- previousState = workflow.state;
2086
- previousRunState = workflow.runState;
2087
- }
2088
- if (isTerminalRunState(workflow.runState)) {
2089
- return workflow;
2088
+ const workflow = await getWorkflowStatus(sdkInstance, workflowId);
2089
+ if (workflow.state !== previousState || workflow.runState !== previousRunState) {
2090
+ const statusChange = {
2091
+ workflowId,
2092
+ previousState,
2093
+ previousRunState,
2094
+ currentState: workflow.state,
2095
+ currentRunState: workflow.runState
2096
+ };
2097
+ sdkInstance.emit("extraction:status_changed", statusChange, "extraction");
2098
+ if (options?.onStatusChange) {
2099
+ options.onStatusChange(statusChange);
2090
2100
  }
2091
- await new Promise((resolve) => setTimeout(resolve, pollingInterval));
2092
- } catch (error) {
2093
- const e = wrapKadoaError(error, {
2094
- message: "Failed to check extraction progress",
2095
- details: { workflowId }
2096
- });
2097
- throw e;
2101
+ previousState = workflow.state;
2102
+ previousRunState = workflow.runState;
2098
2103
  }
2104
+ if (isTerminalRunState(workflow.runState)) {
2105
+ return workflow;
2106
+ }
2107
+ await new Promise((resolve) => setTimeout(resolve, pollingInterval));
2099
2108
  }
2100
- const timeoutError = new KadoaSdkException(
2109
+ throw new KadoaSdkException(
2101
2110
  `Extraction did not complete within ${maxWaitTime / 1e3} seconds`,
2102
2111
  { code: "TIMEOUT", details: { workflowId, maxWaitTime } }
2103
2112
  );
2104
- throw timeoutError;
2105
2113
  }
2106
- async function runExtraction(sdkInstance, options) {
2114
+
2115
+ // src/extraction/extraction-runner.ts
2116
+ function validateExtractionOptions(options) {
2107
2117
  if (!options.urls || options.urls.length === 0) {
2108
- const e = new KadoaSdkException(
2109
- "At least one URL is required for extraction",
2110
- {
2111
- code: "VALIDATION_ERROR"
2112
- }
2113
- );
2114
- throw e;
2118
+ throw new KadoaSdkException(ERROR_MESSAGES.NO_URLS, {
2119
+ code: "VALIDATION_ERROR"
2120
+ });
2115
2121
  }
2122
+ }
2123
+ function isExtractionSuccessful(runState) {
2124
+ return runState ? SUCCESSFUL_RUN_STATES.has(runState.toUpperCase()) : false;
2125
+ }
2126
+ async function runExtraction(sdkInstance, options) {
2127
+ validateExtractionOptions(options);
2128
+ const config = merge(
2129
+ DEFAULT_OPTIONS,
2130
+ options
2131
+ );
2116
2132
  try {
2117
2133
  const entityPrediction = await fetchEntityFields(sdkInstance, {
2118
- link: options.urls[0],
2119
- location: options.location || { type: "auto" },
2120
- navigationMode: options.navigationMode || "single-page"
2134
+ link: config.urls[0],
2135
+ location: config.location,
2136
+ navigationMode: config.navigationMode
2121
2137
  });
2122
2138
  sdkInstance.emit(
2123
2139
  "entity:detected",
2124
2140
  {
2125
2141
  entity: entityPrediction.entity,
2126
2142
  fields: entityPrediction.fields,
2127
- url: options.urls[0]
2143
+ url: config.urls[0]
2128
2144
  },
2129
2145
  "extraction",
2130
2146
  {
2131
- navigationMode: options.navigationMode,
2132
- location: options.location
2147
+ navigationMode: config.navigationMode,
2148
+ location: config.location
2133
2149
  }
2134
2150
  );
2135
- const response = await getWorkflowsApi(sdkInstance).v4WorkflowsPost({
2136
- v4WorkflowsPostRequest: {
2137
- urls: options.urls,
2138
- navigationMode: options.navigationMode || "single-page",
2139
- entity: entityPrediction.entity,
2140
- name: options.name || "Untitled Workflow",
2141
- fields: entityPrediction.fields,
2142
- bypassPreview: true,
2143
- limit: 99999,
2144
- // no limits for SDK
2145
- tags: ["sdk"]
2146
- }
2151
+ const workflowId = await createWorkflow(sdkInstance, {
2152
+ urls: config.urls,
2153
+ navigationMode: config.navigationMode,
2154
+ entity: entityPrediction.entity,
2155
+ fields: entityPrediction.fields,
2156
+ name: config.name
2147
2157
  });
2148
- const workflowId = response.data.workflowId;
2149
- if (!workflowId) {
2150
- const e = new KadoaSdkException(
2151
- "Failed to start extraction process - no ID received",
2152
- {
2153
- code: "INTERNAL_ERROR",
2154
- details: { urls: options.urls }
2155
- }
2156
- );
2157
- throw e;
2158
- }
2159
2158
  sdkInstance.emit(
2160
2159
  "extraction:started",
2161
2160
  {
2162
2161
  workflowId,
2163
- name: options.name || "Untitled Workflow",
2164
- urls: options.urls
2162
+ name: config.name,
2163
+ urls: config.urls
2165
2164
  },
2166
2165
  "extraction"
2167
2166
  );
2168
- const workflow = await waitForWorkflowCompletion(
2169
- sdkInstance,
2170
- workflowId,
2171
- options.pollingInterval,
2172
- options.maxWaitTime
2173
- );
2167
+ const workflow = await waitForWorkflowCompletion(sdkInstance, workflowId, {
2168
+ pollingInterval: config.pollingInterval,
2169
+ maxWaitTime: config.maxWaitTime
2170
+ });
2174
2171
  let data;
2175
- const successfulRunStates = ["FINISHED", "SUCCESS"];
2176
- const isSuccess = workflow.runState && successfulRunStates.includes(workflow.runState.toUpperCase());
2172
+ const isSuccess = isExtractionSuccessful(workflow.runState);
2177
2173
  if (isSuccess) {
2178
2174
  data = await fetchWorkflowData(sdkInstance, workflowId);
2179
2175
  if (data) {
@@ -2210,7 +2206,7 @@ async function runExtraction(sdkInstance, options) {
2210
2206
  },
2211
2207
  "extraction"
2212
2208
  );
2213
- const e = new KadoaSdkException(
2209
+ throw new KadoaSdkException(
2214
2210
  `Extraction completed with unexpected status: ${workflow.runState}`,
2215
2211
  {
2216
2212
  code: "INTERNAL_ERROR",
@@ -2221,7 +2217,6 @@ async function runExtraction(sdkInstance, options) {
2221
2217
  }
2222
2218
  }
2223
2219
  );
2224
- throw e;
2225
2220
  }
2226
2221
  return {
2227
2222
  workflowId,
@@ -2229,11 +2224,42 @@ async function runExtraction(sdkInstance, options) {
2229
2224
  data
2230
2225
  };
2231
2226
  } catch (error) {
2232
- const e = wrapKadoaError(error, {
2233
- message: "Data extraction failed for the provided URLs",
2227
+ throw wrapKadoaError(error, {
2228
+ message: ERROR_MESSAGES.EXTRACTION_FAILED,
2234
2229
  details: { urls: options.urls }
2235
2230
  });
2236
- throw e;
2231
+ }
2232
+ }
2233
+ function initializeSdk(config) {
2234
+ const baseUrl = config.baseUrl || "https://api.kadoa.com";
2235
+ const configParams = {
2236
+ apiKey: config.apiKey,
2237
+ basePath: baseUrl
2238
+ };
2239
+ const configuration = new Configuration(configParams);
2240
+ const axiosInstance = globalAxios2.create({
2241
+ timeout: config.timeout || 3e4
2242
+ });
2243
+ const events = new KadoaEventEmitter();
2244
+ return {
2245
+ configuration,
2246
+ axiosInstance,
2247
+ baseUrl,
2248
+ events,
2249
+ emit: (eventName, payload, source, metadata) => {
2250
+ events.emit(eventName, payload, source, metadata);
2251
+ },
2252
+ onEvent: (listener) => {
2253
+ events.onEvent(listener);
2254
+ },
2255
+ offEvent: (listener) => {
2256
+ events.offEvent(listener);
2257
+ }
2258
+ };
2259
+ }
2260
+ function dispose(sdkInstance) {
2261
+ if (sdkInstance?.events) {
2262
+ sdkInstance.events.removeAllListeners();
2237
2263
  }
2238
2264
  }
2239
2265