@wabot-dev/framework 0.1.0-beta.9 → 0.2.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/dist/src/addon/async/pg/PgJobRepository.js +26 -0
  2. package/dist/src/addon/auth/api-key/@apiKeyConnectionGuard.js +16 -0
  3. package/dist/src/addon/auth/api-key/@apiKeyGuard.js +17 -0
  4. package/dist/src/addon/auth/api-key/ApiKey.js +45 -0
  5. package/dist/src/addon/auth/api-key/ApiKeyConnectionGuardMiddleware.js +57 -0
  6. package/dist/src/addon/auth/api-key/ApiKeyGuardMiddleware.js +45 -0
  7. package/dist/src/addon/auth/api-key/ApiKeyRepository.js +22 -0
  8. package/dist/src/addon/auth/api-key/PgApiKeyRepository.js +53 -0
  9. package/dist/src/addon/auth/api-key/RemoteApiKeyRepository.js +62 -0
  10. package/dist/src/addon/auth/jwt/@jwtConnectionGuard.js +16 -0
  11. package/dist/src/addon/auth/jwt/@jwtGuard.js +17 -0
  12. package/dist/src/addon/auth/jwt/Jwt.js +53 -0
  13. package/dist/src/addon/auth/jwt/JwtAccessAndRefreshTokenDto.js +20 -0
  14. package/dist/src/addon/auth/jwt/JwtConfig.js +28 -0
  15. package/dist/src/addon/auth/jwt/JwtConnectionGuardMiddleware.js +57 -0
  16. package/dist/src/addon/auth/jwt/JwtGuardMiddleware.js +45 -0
  17. package/dist/src/addon/auth/jwt/JwtRefreshToken.js +56 -0
  18. package/dist/src/addon/auth/jwt/JwtRefreshTokenRepository.js +25 -0
  19. package/dist/src/addon/auth/jwt/JwtSigner.js +36 -0
  20. package/dist/src/addon/auth/jwt/JwtTokenDto.js +22 -0
  21. package/dist/src/addon/auth/jwt/PgJwtRefreshTokenRepository.js +21 -0
  22. package/dist/src/addon/chat-bot/anthropic/AnthropicChatAdapter.js +135 -0
  23. package/dist/src/addon/chat-bot/deepseek/DeepSeekChatAdapter.js +137 -0
  24. package/dist/src/addon/chat-bot/google/GoogleChatAdapter.js +128 -0
  25. package/dist/src/addon/chat-bot/openia/OpenaiChatAdapter.js +117 -0
  26. package/dist/src/{pre-made/repository/chat → addon/chat-bot}/pg/PgChatMemory.js +6 -4
  27. package/dist/src/{pre-made/repository/chat → addon/chat-bot}/pg/PgChatRepository.js +6 -5
  28. package/dist/src/{pre-made/repository/chat → addon/chat-bot}/ram/RamChatRepository.js +2 -2
  29. package/dist/src/addon/chat-bot/wabot/WabotChatAdapter.js +41 -0
  30. package/dist/src/addon/chat-controller/cmd/@cmd.js +24 -0
  31. package/dist/src/addon/chat-controller/cmd/CmdChannel.js +91 -0
  32. package/dist/src/{channels → addon/chat-controller}/socket/@socket.js +10 -4
  33. package/dist/src/{channels → addon/chat-controller}/socket/SocketChannel.js +9 -23
  34. package/dist/src/{channels → addon/chat-controller}/socket/SocketChannelConfig.js +1 -1
  35. package/dist/src/{channels → addon/chat-controller}/telegram/@telegram.js +10 -4
  36. package/dist/src/{channels → addon/chat-controller}/telegram/TelegramChannel.js +4 -22
  37. package/dist/src/addon/chat-controller/whatsapp/@whatsApp.js +26 -0
  38. package/dist/src/{channels → addon/chat-controller}/whatsapp/EnvWhatsAppRepository.js +3 -3
  39. package/dist/src/{channels → addon/chat-controller}/whatsapp/PgWhatsAppRepository.js +2 -2
  40. package/dist/src/{channels → addon/chat-controller}/whatsapp/WhatsApp.js +2 -4
  41. package/dist/src/{channels → addon/chat-controller}/whatsapp/WhatsAppChannel.js +6 -18
  42. package/dist/src/addon/chat-controller/whatsapp/WhatsAppReceiver.js +10 -0
  43. package/dist/src/{channels → addon/chat-controller}/whatsapp/WhatsAppSender.js +15 -39
  44. package/dist/src/addon/chat-controller/whatsapp/cloud-api/WhatsAppReceiverByCloudApi.js +97 -0
  45. package/dist/src/{channels/whatsapp → addon/chat-controller/whatsapp/cloud-api}/WhatsAppSenderByCloudApi.js +27 -20
  46. package/dist/src/addon/chat-controller/whatsapp/proxy/WhatsAppProxyContracts.js +5 -0
  47. package/dist/src/addon/chat-controller/whatsapp/proxy/WhatsAppReceiverByWabotProxy.js +65 -0
  48. package/dist/src/addon/chat-controller/whatsapp/proxy/WhatsAppSenderByWabotProxy.js +61 -0
  49. package/dist/src/addon/chat-controller/whatsapp/proxy/WhatsAppWabotProxyConnection.js +45 -0
  50. package/dist/src/{pre-made/module → addon/mindset}/html/HtmlModule.js +7 -7
  51. package/dist/src/core/auth/Auth.js +33 -0
  52. package/dist/src/core/{Persistent.js → entity/Entity.js} +24 -12
  53. package/dist/src/core/env/Env.js +39 -0
  54. package/dist/src/core/error/CustomError.js +15 -0
  55. package/dist/src/core/injection/index.js +4 -0
  56. package/dist/src/core/mapper/Mapper.js +42 -0
  57. package/dist/src/core/password/Password.js +30 -0
  58. package/dist/src/core/random/Random.js +65 -0
  59. package/dist/src/core/storable/Storable.js +8 -0
  60. package/dist/src/core/validation/core/validateArray.js +51 -0
  61. package/dist/src/core/validation/core/validateModel.js +36 -0
  62. package/dist/src/{validation/metadata/@isNumber.js → core/validation/metadata/@isArray.js} +5 -4
  63. package/dist/src/{validation/metadata/@isDate.js → core/validation/metadata/@isModel.js} +5 -4
  64. package/dist/src/{validation → core/validation}/metadata/@isOptional.js +1 -1
  65. package/dist/src/core/validation/metadata/ValidationMetadataStore.js +98 -0
  66. package/dist/src/core/validation/modelInfo.js +9 -0
  67. package/dist/src/{validation/validateModel2.js → core/validation/validate.js} +3 -3
  68. package/dist/src/{validation/metadata → core/validation/validators/is-boolean}/@isBoolean.js +3 -3
  69. package/dist/src/core/validation/validators/is-date/@isDate.js +17 -0
  70. package/dist/src/core/validation/validators/is-in/@isIn.js +18 -0
  71. package/dist/src/core/validation/validators/is-in/validateIsIn.js +12 -0
  72. package/dist/src/{validation/metadata → core/validation/validators/is-not-empty}/@isNotEmpty.js +3 -3
  73. package/dist/src/core/validation/validators/is-number/@isNumber.js +17 -0
  74. package/dist/src/{validation/metadata → core/validation/validators/is-present}/@isPresent.js +3 -3
  75. package/dist/src/{validation/metadata → core/validation/validators/is-string}/@isString.js +3 -3
  76. package/dist/src/{validation/metadata → core/validation/validators/max}/@max.js +3 -3
  77. package/dist/src/{validation/metadata → core/validation/validators/min}/@min.js +3 -3
  78. package/dist/src/feature/async/@command.js +11 -0
  79. package/dist/src/feature/async/@commandHandler.js +12 -0
  80. package/dist/src/feature/async/Async.js +38 -0
  81. package/dist/src/feature/async/Command.js +9 -0
  82. package/dist/src/feature/async/CommandMetadataStore.js +38 -0
  83. package/dist/src/feature/async/Job.js +27 -0
  84. package/dist/src/feature/async/JobRepository.js +31 -0
  85. package/dist/src/feature/async/JobRunner.js +48 -0
  86. package/dist/src/feature/async/JobsEventsHub.js +36 -0
  87. package/dist/src/feature/async/runCommandHandlers.js +29 -0
  88. package/dist/src/{core/chat → feature/chat-bot}/Chat.js +2 -2
  89. package/dist/src/feature/chat-bot/ChatAdapter.js +7 -0
  90. package/dist/src/feature/chat-bot/ChatBot.js +73 -0
  91. package/dist/src/feature/chat-bot/ChatItem.js +24 -0
  92. package/dist/src/feature/chat-bot/ChatMemory.js +10 -0
  93. package/dist/src/{core/chat/repository/IChatRepository.js → feature/chat-bot/ChatRepository.js} +2 -8
  94. package/dist/src/feature/chat-bot/IChatItem.js +3 -0
  95. package/dist/src/{chatbot → feature/chat-bot}/metadata/@chatBot.js +1 -1
  96. package/dist/src/{chatbot → feature/chat-bot}/metadata/ChatBotMetadataStore.js +1 -1
  97. package/dist/src/{controller/channel → feature/chat-controller}/ChatResolver.js +6 -4
  98. package/dist/src/{controller → feature/chat-controller}/metadata/ControllerMetadataStore.js +1 -1
  99. package/dist/src/{controller → feature/chat-controller}/metadata/controller/@chatController.js +1 -1
  100. package/dist/src/feature/chat-controller/runChatControllers.js +83 -0
  101. package/dist/src/{channels → feature}/express/ExpressProvider.js +2 -4
  102. package/dist/src/{channels → feature}/http/HttpServerProvider.js +2 -2
  103. package/dist/src/{mindset → feature/mindset}/IMindset.js +6 -0
  104. package/dist/src/feature/mindset/MindsetOperator.js +180 -0
  105. package/dist/src/{mindset → feature/mindset}/metadata/MindsetMetadataStore.js +1 -1
  106. package/dist/src/{mindset → feature/mindset}/metadata/functions/@mindsetFunction.js +1 -1
  107. package/dist/src/{mindset → feature/mindset}/metadata/mindsets/@mindset.js +1 -1
  108. package/dist/src/{mindset → feature/mindset}/metadata/modules/@mindsetModule.js +1 -2
  109. package/dist/src/{mindset → feature/mindset}/metadata/params/@param.js +1 -1
  110. package/dist/src/feature/money/Money.js +61 -0
  111. package/dist/src/feature/money/MoneyDto.js +22 -0
  112. package/dist/src/{repository → feature}/pg/PgCrudRepository.js +24 -10
  113. package/dist/src/{repository → feature}/pg/PgRepositoryBase.js +2 -2
  114. package/dist/src/feature/rest-controller/injection-tokens.js +4 -0
  115. package/dist/src/{rest-controller/metadata/@post.js → feature/rest-controller/metadata/@middleware.js} +5 -8
  116. package/dist/src/feature/rest-controller/metadata/@onDelete.js +7 -0
  117. package/dist/src/feature/rest-controller/metadata/@onGet.js +7 -0
  118. package/dist/src/feature/rest-controller/metadata/@onPost.js +7 -0
  119. package/dist/src/feature/rest-controller/metadata/@onPut.js +7 -0
  120. package/dist/src/{rest-controller → feature/rest-controller}/metadata/@restController.js +2 -2
  121. package/dist/src/{rest-controller → feature/rest-controller}/metadata/RestControllerMetadataStore.js +14 -1
  122. package/dist/src/{rest-controller/metadata/@get.js → feature/rest-controller/metadata/methodDecorator.js} +5 -5
  123. package/dist/src/feature/rest-controller/runRestControllers.js +103 -0
  124. package/dist/src/{channels → feature}/socket/SocketServerProvider.js +2 -2
  125. package/dist/src/feature/socket-controller/metadata/@connectionMiddleware.js +16 -0
  126. package/dist/src/feature/socket-controller/metadata/@socketConnection.js +18 -0
  127. package/dist/src/feature/socket-controller/metadata/@socketController.js +15 -0
  128. package/dist/src/feature/socket-controller/metadata/@socketEvent.js +18 -0
  129. package/dist/src/feature/socket-controller/metadata/SocketControllerMetadataStore.js +65 -0
  130. package/dist/src/feature/socket-controller/runSocketControllers.js +99 -0
  131. package/dist/src/index.d.ts +1219 -718
  132. package/dist/src/index.js +147 -102
  133. package/package.json +8 -2
  134. package/dist/src/_virtual/_commonjsHelpers.js +0 -5
  135. package/dist/src/_virtual/cjs.js +0 -7
  136. package/dist/src/ai/deepseek/DeepSeekChatBotAdapter.js +0 -107
  137. package/dist/src/ai/openia/OpenaiChatBotAdapter.js +0 -88
  138. package/dist/src/channels/cmd/@cmd.js +0 -18
  139. package/dist/src/channels/cmd/CmdChannel.js +0 -73
  140. package/dist/src/channels/wabot/WabotDevConnection.js +0 -57
  141. package/dist/src/channels/wabot/WabotDevSocketContracts.js +0 -10
  142. package/dist/src/channels/whatsapp/@whatsapp.js +0 -20
  143. package/dist/src/channels/whatsapp/WhatsAppReceiver.js +0 -59
  144. package/dist/src/channels/whatsapp/WhatsAppReceiverByDevConnection.js +0 -32
  145. package/dist/src/channels/whatsapp/WhatsAppReceiverByWebHook.js +0 -63
  146. package/dist/src/channels/whatsapp/WhatsAppSenderByDevConnection.js +0 -61
  147. package/dist/src/chatbot/ChatBot.js +0 -51
  148. package/dist/src/chatbot/ChatBotAdapter.js +0 -72
  149. package/dist/src/controller/channel/UserResolver.js +0 -21
  150. package/dist/src/core/IMessageContext.js +0 -12
  151. package/dist/src/core/chat/ChatItem.js +0 -15
  152. package/dist/src/core/chat/repository/IChatMemory.js +0 -10
  153. package/dist/src/core/user/IUserRepository.js +0 -19
  154. package/dist/src/core/user/User.js +0 -26
  155. package/dist/src/env/WabotEnv.js +0 -27
  156. package/dist/src/injection/index.js +0 -4
  157. package/dist/src/mindset/MindsetOperator.js +0 -101
  158. package/dist/src/node_modules/@selderee/plugin-htmlparser2/lib/hp2-builder.js +0 -90
  159. package/dist/src/node_modules/deepmerge/dist/cjs.js +0 -142
  160. package/dist/src/node_modules/dom-serializer/lib/esm/foreignNames.js +0 -102
  161. package/dist/src/node_modules/dom-serializer/lib/esm/index.js +0 -186
  162. package/dist/src/node_modules/domelementtype/lib/esm/index.js +0 -53
  163. package/dist/src/node_modules/domhandler/lib/esm/index.js +0 -148
  164. package/dist/src/node_modules/domhandler/lib/esm/node.js +0 -334
  165. package/dist/src/node_modules/entities/lib/esm/decode.js +0 -458
  166. package/dist/src/node_modules/entities/lib/esm/decode_codepoint.js +0 -62
  167. package/dist/src/node_modules/entities/lib/esm/escape.js +0 -99
  168. package/dist/src/node_modules/entities/lib/esm/generated/decode-data-html.js +0 -8
  169. package/dist/src/node_modules/entities/lib/esm/generated/decode-data-xml.js +0 -8
  170. package/dist/src/node_modules/html-to-text/lib/html-to-text.js +0 -2147
  171. package/dist/src/node_modules/htmlparser2/lib/esm/Parser.js +0 -491
  172. package/dist/src/node_modules/htmlparser2/lib/esm/Tokenizer.js +0 -928
  173. package/dist/src/node_modules/htmlparser2/lib/esm/index.js +0 -18
  174. package/dist/src/node_modules/leac/lib/leac.js +0 -3
  175. package/dist/src/node_modules/parseley/lib/parseley.js +0 -270
  176. package/dist/src/node_modules/peberminta/lib/core.js +0 -171
  177. package/dist/src/node_modules/selderee/lib/selderee.js +0 -380
  178. package/dist/src/pre-made/module/authentication/AuthenticationModule.js +0 -97
  179. package/dist/src/pre-made/module/authentication/requests/SendOneTimePasswordRequest.js +0 -25
  180. package/dist/src/pre-made/module/authentication/requests/ValidateOneTimePasswordRequest.js +0 -25
  181. package/dist/src/pre-made/module/register-user/RegisterUserModule.js +0 -56
  182. package/dist/src/pre-made/module/register-user/requests/RegisterUserWithEmailRequest.js +0 -25
  183. package/dist/src/pre-made/repository/user/pg/PgUserRepository.js +0 -33
  184. package/dist/src/pre-made/repository/user/ram/RamUserRepository.js +0 -27
  185. package/dist/src/pre-made/service/EmailService.js +0 -13
  186. package/dist/src/pre-made/service/OtpService.js +0 -14
  187. package/dist/src/rest-controller/runRestControllers.js +0 -74
  188. package/dist/src/server/prepareChatContainer.js +0 -43
  189. package/dist/src/server/runChannel.js +0 -27
  190. package/dist/src/server/runServer.js +0 -40
  191. package/dist/src/validation/metadata/@validable.js +0 -14
  192. package/dist/src/validation/metadata/ValidationMetadataStore.js +0 -55
  193. package/dist/src/validation/validators/validateModel.js +0 -47
  194. /package/dist/src/{pre-made/repository/chat → addon/chat-bot}/ram/RamChatMemory.js +0 -0
  195. /package/dist/src/{channels → addon/chat-controller}/telegram/TelegramChannelConfig.js +0 -0
  196. /package/dist/src/{channels → addon/chat-controller}/whatsapp/WhatsAppChannelConfig.js +0 -0
  197. /package/dist/src/{channels → addon/chat-controller}/whatsapp/WhatsAppRepository.js +0 -0
  198. /package/dist/src/{injection → core/injection}/Container.js +0 -0
  199. /package/dist/src/{logger → core/logger}/Logger.js +0 -0
  200. /package/dist/src/{validation/validators → core/validation/core}/validateIsOptional.js +0 -0
  201. /package/dist/src/{validation/validators → core/validation/validators/is-boolean}/validateIsBoolean.js +0 -0
  202. /package/dist/src/{validation/validators → core/validation/validators/is-date}/validateIsDate.js +0 -0
  203. /package/dist/src/{validation/validators → core/validation/validators/is-not-empty}/validateIsNotEmpty.js +0 -0
  204. /package/dist/src/{validation/validators → core/validation/validators/is-number}/validateIsNumber.js +0 -0
  205. /package/dist/src/{validation/validators → core/validation/validators/is-present}/validateIsPresent.js +0 -0
  206. /package/dist/src/{validation/validators → core/validation/validators/is-string}/validateIsString.js +0 -0
  207. /package/dist/src/{validation/validators → core/validation/validators/max}/validateMax.js +0 -0
  208. /package/dist/src/{validation/validators → core/validation/validators/min}/validateMin.js +0 -0
  209. /package/dist/src/{mindset → feature/mindset}/metadata/functions/decoratorNames.js +0 -0
  210. /package/dist/src/{mindset → feature/mindset}/metadata/mindsets/decoratorNames.js +0 -0
  211. /package/dist/src/{mindset → feature/mindset}/metadata/modules/decoratorNames.js +0 -0
  212. /package/dist/src/{mindset → feature/mindset}/metadata/params/decoratorNames.js +0 -0
@@ -1,2147 +0,0 @@
1
- import { hp2Builder } from '../../@selderee/plugin-htmlparser2/lib/hp2-builder.js';
2
- import { parseDocument } from '../../htmlparser2/lib/esm/index.js';
3
- import { DecisionTree } from '../../selderee/lib/selderee.js';
4
- import merge from '../../../_virtual/cjs.js';
5
- import { render } from '../../dom-serializer/lib/esm/index.js';
6
-
7
- /**
8
- * Make a recursive function that will only run to a given depth
9
- * and switches to an alternative function at that depth. \
10
- * No limitation if `n` is `undefined` (Just wraps `f` in that case).
11
- *
12
- * @param { number | undefined } n Allowed depth of recursion. `undefined` for no limitation.
13
- * @param { Function } f Function that accepts recursive callback as the first argument.
14
- * @param { Function } [g] Function to run instead, when maximum depth was reached. Do nothing by default.
15
- * @returns { Function }
16
- */
17
- function limitedDepthRecursive (n, f, g = () => undefined) {
18
- if (n === undefined) {
19
- const f1 = function (...args) { return f(f1, ...args); };
20
- return f1;
21
- }
22
- if (n >= 0) {
23
- return function (...args) { return f(limitedDepthRecursive(n - 1, f, g), ...args); };
24
- }
25
- return g;
26
- }
27
-
28
- /**
29
- * Return the same string or a substring with
30
- * the given character occurrences removed from each side.
31
- *
32
- * @param { string } str A string to trim.
33
- * @param { string } char A character to be trimmed.
34
- * @returns { string }
35
- */
36
- function trimCharacter (str, char) {
37
- let start = 0;
38
- let end = str.length;
39
- while (start < end && str[start] === char) { ++start; }
40
- while (end > start && str[end - 1] === char) { --end; }
41
- return (start > 0 || end < str.length)
42
- ? str.substring(start, end)
43
- : str;
44
- }
45
-
46
- /**
47
- * Return the same string or a substring with
48
- * the given character occurrences removed from the end only.
49
- *
50
- * @param { string } str A string to trim.
51
- * @param { string } char A character to be trimmed.
52
- * @returns { string }
53
- */
54
- function trimCharacterEnd (str, char) {
55
- let end = str.length;
56
- while (end > 0 && str[end - 1] === char) { --end; }
57
- return (end < str.length)
58
- ? str.substring(0, end)
59
- : str;
60
- }
61
-
62
- /**
63
- * Return a new string will all characters replaced with unicode escape sequences.
64
- * This extreme kind of escaping can used to be safely compose regular expressions.
65
- *
66
- * @param { string } str A string to escape.
67
- * @returns { string } A string of unicode escape sequences.
68
- */
69
- function unicodeEscape (str) {
70
- return str.replace(/[\s\S]/g, c => '\\u' + c.charCodeAt().toString(16).padStart(4, '0'));
71
- }
72
-
73
- /**
74
- * Deduplicate an array by a given key callback.
75
- * Item properties are merged recursively and with the preference for last defined values.
76
- * Of items with the same key, merged item takes the place of the last item,
77
- * others are omitted.
78
- *
79
- * @param { any[] } items An array to deduplicate.
80
- * @param { (x: any) => string } getKey Callback to get a value that distinguishes unique items.
81
- * @returns { any[] }
82
- */
83
- function mergeDuplicatesPreferLast (items, getKey) {
84
- const map = new Map();
85
- for (let i = items.length; i-- > 0;) {
86
- const item = items[i];
87
- const key = getKey(item);
88
- map.set(
89
- key,
90
- (map.has(key))
91
- ? merge(item, map.get(key), { arrayMerge: overwriteMerge$1 })
92
- : item
93
- );
94
- }
95
- return [...map.values()].reverse();
96
- }
97
-
98
- const overwriteMerge$1 = (acc, src, options) => [...src];
99
-
100
- /**
101
- * Get a nested property from an object.
102
- *
103
- * @param { object } obj The object to query for the value.
104
- * @param { string[] } path The path to the property.
105
- * @returns { any }
106
- */
107
- function get (obj, path) {
108
- for (const key of path) {
109
- if (!obj) { return undefined; }
110
- obj = obj[key];
111
- }
112
- return obj;
113
- }
114
-
115
- /**
116
- * Convert a number into alphabetic sequence representation (Sequence without zeroes).
117
- *
118
- * For example: `a, ..., z, aa, ..., zz, aaa, ...`.
119
- *
120
- * @param { number } num Number to convert. Must be >= 1.
121
- * @param { string } [baseChar = 'a'] Character for 1 in the sequence.
122
- * @param { number } [base = 26] Number of characters in the sequence.
123
- * @returns { string }
124
- */
125
- function numberToLetterSequence (num, baseChar = 'a', base = 26) {
126
- const digits = [];
127
- do {
128
- num -= 1;
129
- digits.push(num % base);
130
- num = (num / base) >> 0; // quick `floor`
131
- } while (num > 0);
132
- const baseCode = baseChar.charCodeAt(0);
133
- return digits
134
- .reverse()
135
- .map(n => String.fromCharCode(baseCode + n))
136
- .join('');
137
- }
138
-
139
- const I = ['I', 'X', 'C', 'M'];
140
- const V = ['V', 'L', 'D'];
141
-
142
- /**
143
- * Convert a number to it's Roman representation. No large numbers extension.
144
- *
145
- * @param { number } num Number to convert. `0 < num <= 3999`.
146
- * @returns { string }
147
- */
148
- function numberToRoman (num) {
149
- return [...(num) + '']
150
- .map(n => +n)
151
- .reverse()
152
- .map((v, i) => ((v % 5 < 4)
153
- ? (v < 5 ? '' : V[i]) + I[i].repeat(v % 5)
154
- : I[i] + (v < 5 ? V[i] : I[i + 1])))
155
- .reverse()
156
- .join('');
157
- }
158
-
159
- /**
160
- * Helps to build text from words.
161
- */
162
- class InlineTextBuilder {
163
- /**
164
- * Creates an instance of InlineTextBuilder.
165
- *
166
- * If `maxLineLength` is not provided then it is either `options.wordwrap` or unlimited.
167
- *
168
- * @param { Options } options HtmlToText options.
169
- * @param { number } [ maxLineLength ] This builder will try to wrap text to fit this line length.
170
- */
171
- constructor (options, maxLineLength = undefined) {
172
- /** @type { string[][] } */
173
- this.lines = [];
174
- /** @type { string[] } */
175
- this.nextLineWords = [];
176
- this.maxLineLength = maxLineLength || options.wordwrap || Number.MAX_VALUE;
177
- this.nextLineAvailableChars = this.maxLineLength;
178
- this.wrapCharacters = get(options, ['longWordSplit', 'wrapCharacters']) || [];
179
- this.forceWrapOnLimit = get(options, ['longWordSplit', 'forceWrapOnLimit']) || false;
180
-
181
- this.stashedSpace = false;
182
- this.wordBreakOpportunity = false;
183
- }
184
-
185
- /**
186
- * Add a new word.
187
- *
188
- * @param { string } word A word to add.
189
- * @param { boolean } [noWrap] Don't wrap text even if the line is too long.
190
- */
191
- pushWord (word, noWrap = false) {
192
- if (this.nextLineAvailableChars <= 0 && !noWrap) {
193
- this.startNewLine();
194
- }
195
- const isLineStart = this.nextLineWords.length === 0;
196
- const cost = word.length + (isLineStart ? 0 : 1);
197
- if ((cost <= this.nextLineAvailableChars) || noWrap) { // Fits into available budget
198
-
199
- this.nextLineWords.push(word);
200
- this.nextLineAvailableChars -= cost;
201
-
202
- } else { // Does not fit - try to split the word
203
-
204
- // The word is moved to a new line - prefer to wrap between words.
205
- const [first, ...rest] = this.splitLongWord(word);
206
- if (!isLineStart) { this.startNewLine(); }
207
- this.nextLineWords.push(first);
208
- this.nextLineAvailableChars -= first.length;
209
- for (const part of rest) {
210
- this.startNewLine();
211
- this.nextLineWords.push(part);
212
- this.nextLineAvailableChars -= part.length;
213
- }
214
-
215
- }
216
- }
217
-
218
- /**
219
- * Pop a word from the currently built line.
220
- * This doesn't affect completed lines.
221
- *
222
- * @returns { string }
223
- */
224
- popWord () {
225
- const lastWord = this.nextLineWords.pop();
226
- if (lastWord !== undefined) {
227
- const isLineStart = this.nextLineWords.length === 0;
228
- const cost = lastWord.length + (isLineStart ? 0 : 1);
229
- this.nextLineAvailableChars += cost;
230
- }
231
- return lastWord;
232
- }
233
-
234
- /**
235
- * Concat a word to the last word already in the builder.
236
- * Adds a new word in case there are no words yet in the last line.
237
- *
238
- * @param { string } word A word to be concatenated.
239
- * @param { boolean } [noWrap] Don't wrap text even if the line is too long.
240
- */
241
- concatWord (word, noWrap = false) {
242
- if (this.wordBreakOpportunity && word.length > this.nextLineAvailableChars) {
243
- this.pushWord(word, noWrap);
244
- this.wordBreakOpportunity = false;
245
- } else {
246
- const lastWord = this.popWord();
247
- this.pushWord((lastWord) ? lastWord.concat(word) : word, noWrap);
248
- }
249
- }
250
-
251
- /**
252
- * Add current line (and more empty lines if provided argument > 1) to the list of complete lines and start a new one.
253
- *
254
- * @param { number } n Number of line breaks that will be added to the resulting string.
255
- */
256
- startNewLine (n = 1) {
257
- this.lines.push(this.nextLineWords);
258
- if (n > 1) {
259
- this.lines.push(...Array.from({ length: n - 1 }, () => []));
260
- }
261
- this.nextLineWords = [];
262
- this.nextLineAvailableChars = this.maxLineLength;
263
- }
264
-
265
- /**
266
- * No words in this builder.
267
- *
268
- * @returns { boolean }
269
- */
270
- isEmpty () {
271
- return this.lines.length === 0
272
- && this.nextLineWords.length === 0;
273
- }
274
-
275
- clear () {
276
- this.lines.length = 0;
277
- this.nextLineWords.length = 0;
278
- this.nextLineAvailableChars = this.maxLineLength;
279
- }
280
-
281
- /**
282
- * Join all lines of words inside the InlineTextBuilder into a complete string.
283
- *
284
- * @returns { string }
285
- */
286
- toString () {
287
- return [...this.lines, this.nextLineWords]
288
- .map(words => words.join(' '))
289
- .join('\n');
290
- }
291
-
292
- /**
293
- * Split a long word up to fit within the word wrap limit.
294
- * Use either a character to split looking back from the word wrap limit,
295
- * or truncate to the word wrap limit.
296
- *
297
- * @param { string } word Input word.
298
- * @returns { string[] } Parts of the word.
299
- */
300
- splitLongWord (word) {
301
- const parts = [];
302
- let idx = 0;
303
- while (word.length > this.maxLineLength) {
304
-
305
- const firstLine = word.substring(0, this.maxLineLength);
306
- const remainingChars = word.substring(this.maxLineLength);
307
-
308
- const splitIndex = firstLine.lastIndexOf(this.wrapCharacters[idx]);
309
-
310
- if (splitIndex > -1) { // Found a character to split on
311
-
312
- word = firstLine.substring(splitIndex + 1) + remainingChars;
313
- parts.push(firstLine.substring(0, splitIndex + 1));
314
-
315
- } else { // Not found a character to split on
316
-
317
- idx++;
318
- if (idx < this.wrapCharacters.length) { // There is next character to try
319
-
320
- word = firstLine + remainingChars;
321
-
322
- } else { // No more characters to try
323
-
324
- if (this.forceWrapOnLimit) {
325
- parts.push(firstLine);
326
- word = remainingChars;
327
- if (word.length > this.maxLineLength) {
328
- continue;
329
- }
330
- } else {
331
- word = firstLine + remainingChars;
332
- }
333
- break;
334
-
335
- }
336
-
337
- }
338
-
339
- }
340
- parts.push(word); // Add remaining part to array
341
- return parts;
342
- }
343
- }
344
-
345
- /* eslint-disable max-classes-per-file */
346
-
347
-
348
- class StackItem {
349
- constructor (next = null) { this.next = next; }
350
-
351
- getRoot () { return (this.next) ? this.next : this; }
352
- }
353
-
354
- class BlockStackItem extends StackItem {
355
- constructor (options, next = null, leadingLineBreaks = 1, maxLineLength = undefined) {
356
- super(next);
357
- this.leadingLineBreaks = leadingLineBreaks;
358
- this.inlineTextBuilder = new InlineTextBuilder(options, maxLineLength);
359
- this.rawText = '';
360
- this.stashedLineBreaks = 0;
361
- this.isPre = next && next.isPre;
362
- this.isNoWrap = next && next.isNoWrap;
363
- }
364
- }
365
-
366
- class ListStackItem extends BlockStackItem {
367
- constructor (
368
- options,
369
- next = null,
370
- {
371
- interRowLineBreaks = 1,
372
- leadingLineBreaks = 2,
373
- maxLineLength = undefined,
374
- maxPrefixLength = 0,
375
- prefixAlign = 'left',
376
- } = {}
377
- ) {
378
- super(options, next, leadingLineBreaks, maxLineLength);
379
- this.maxPrefixLength = maxPrefixLength;
380
- this.prefixAlign = prefixAlign;
381
- this.interRowLineBreaks = interRowLineBreaks;
382
- }
383
- }
384
-
385
- class ListItemStackItem extends BlockStackItem {
386
- constructor (
387
- options,
388
- next = null,
389
- {
390
- leadingLineBreaks = 1,
391
- maxLineLength = undefined,
392
- prefix = '',
393
- } = {}
394
- ) {
395
- super(options, next, leadingLineBreaks, maxLineLength);
396
- this.prefix = prefix;
397
- }
398
- }
399
-
400
- class TableStackItem extends StackItem {
401
- constructor (next = null) {
402
- super(next);
403
- this.rows = [];
404
- this.isPre = next && next.isPre;
405
- this.isNoWrap = next && next.isNoWrap;
406
- }
407
- }
408
-
409
- class TableRowStackItem extends StackItem {
410
- constructor (next = null) {
411
- super(next);
412
- this.cells = [];
413
- this.isPre = next && next.isPre;
414
- this.isNoWrap = next && next.isNoWrap;
415
- }
416
- }
417
-
418
- class TableCellStackItem extends StackItem {
419
- constructor (options, next = null, maxColumnWidth = undefined) {
420
- super(next);
421
- this.inlineTextBuilder = new InlineTextBuilder(options, maxColumnWidth);
422
- this.rawText = '';
423
- this.stashedLineBreaks = 0;
424
- this.isPre = next && next.isPre;
425
- this.isNoWrap = next && next.isNoWrap;
426
- }
427
- }
428
-
429
- class TransformerStackItem extends StackItem {
430
- constructor (next = null, transform) {
431
- super(next);
432
- this.transform = transform;
433
- }
434
- }
435
-
436
- function charactersToCodes (str) {
437
- return [...str]
438
- .map(c => '\\u' + c.charCodeAt(0).toString(16).padStart(4, '0'))
439
- .join('');
440
- }
441
-
442
- /**
443
- * Helps to handle HTML whitespaces.
444
- *
445
- * @class WhitespaceProcessor
446
- */
447
- class WhitespaceProcessor {
448
-
449
- /**
450
- * Creates an instance of WhitespaceProcessor.
451
- *
452
- * @param { Options } options HtmlToText options.
453
- * @memberof WhitespaceProcessor
454
- */
455
- constructor (options) {
456
- this.whitespaceChars = (options.preserveNewlines)
457
- ? options.whitespaceCharacters.replace(/\n/g, '')
458
- : options.whitespaceCharacters;
459
- const whitespaceCodes = charactersToCodes(this.whitespaceChars);
460
- this.leadingWhitespaceRe = new RegExp(`^[${whitespaceCodes}]`);
461
- this.trailingWhitespaceRe = new RegExp(`[${whitespaceCodes}]$`);
462
- this.allWhitespaceOrEmptyRe = new RegExp(`^[${whitespaceCodes}]*$`);
463
- this.newlineOrNonWhitespaceRe = new RegExp(`(\\n|[^\\n${whitespaceCodes}])`, 'g');
464
- this.newlineOrNonNewlineStringRe = new RegExp(`(\\n|[^\\n]+)`, 'g');
465
-
466
- if (options.preserveNewlines) {
467
-
468
- const wordOrNewlineRe = new RegExp(`\\n|[^\\n${whitespaceCodes}]+`, 'gm');
469
-
470
- /**
471
- * Shrink whitespaces and wrap text, add to the builder.
472
- *
473
- * @param { string } text Input text.
474
- * @param { InlineTextBuilder } inlineTextBuilder A builder to receive processed text.
475
- * @param { (str: string) => string } [ transform ] A transform to be applied to words.
476
- * @param { boolean } [noWrap] Don't wrap text even if the line is too long.
477
- */
478
- this.shrinkWrapAdd = function (text, inlineTextBuilder, transform = (str => str), noWrap = false) {
479
- if (!text) { return; }
480
- const previouslyStashedSpace = inlineTextBuilder.stashedSpace;
481
- let anyMatch = false;
482
- let m = wordOrNewlineRe.exec(text);
483
- if (m) {
484
- anyMatch = true;
485
- if (m[0] === '\n') {
486
- inlineTextBuilder.startNewLine();
487
- } else if (previouslyStashedSpace || this.testLeadingWhitespace(text)) {
488
- inlineTextBuilder.pushWord(transform(m[0]), noWrap);
489
- } else {
490
- inlineTextBuilder.concatWord(transform(m[0]), noWrap);
491
- }
492
- while ((m = wordOrNewlineRe.exec(text)) !== null) {
493
- if (m[0] === '\n') {
494
- inlineTextBuilder.startNewLine();
495
- } else {
496
- inlineTextBuilder.pushWord(transform(m[0]), noWrap);
497
- }
498
- }
499
- }
500
- inlineTextBuilder.stashedSpace = (previouslyStashedSpace && !anyMatch) || (this.testTrailingWhitespace(text));
501
- // No need to stash a space in case last added item was a new line,
502
- // but that won't affect anything later anyway.
503
- };
504
-
505
- } else {
506
-
507
- const wordRe = new RegExp(`[^${whitespaceCodes}]+`, 'g');
508
-
509
- this.shrinkWrapAdd = function (text, inlineTextBuilder, transform = (str => str), noWrap = false) {
510
- if (!text) { return; }
511
- const previouslyStashedSpace = inlineTextBuilder.stashedSpace;
512
- let anyMatch = false;
513
- let m = wordRe.exec(text);
514
- if (m) {
515
- anyMatch = true;
516
- if (previouslyStashedSpace || this.testLeadingWhitespace(text)) {
517
- inlineTextBuilder.pushWord(transform(m[0]), noWrap);
518
- } else {
519
- inlineTextBuilder.concatWord(transform(m[0]), noWrap);
520
- }
521
- while ((m = wordRe.exec(text)) !== null) {
522
- inlineTextBuilder.pushWord(transform(m[0]), noWrap);
523
- }
524
- }
525
- inlineTextBuilder.stashedSpace = (previouslyStashedSpace && !anyMatch) || this.testTrailingWhitespace(text);
526
- };
527
-
528
- }
529
- }
530
-
531
- /**
532
- * Add text with only minimal processing.
533
- * Everything between newlines considered a single word.
534
- * No whitespace is trimmed.
535
- * Not affected by preserveNewlines option - `\n` always starts a new line.
536
- *
537
- * `noWrap` argument is `true` by default - this won't start a new line
538
- * even if there is not enough space left in the current line.
539
- *
540
- * @param { string } text Input text.
541
- * @param { InlineTextBuilder } inlineTextBuilder A builder to receive processed text.
542
- * @param { boolean } [noWrap] Don't wrap text even if the line is too long.
543
- */
544
- addLiteral (text, inlineTextBuilder, noWrap = true) {
545
- if (!text) { return; }
546
- const previouslyStashedSpace = inlineTextBuilder.stashedSpace;
547
- let anyMatch = false;
548
- let m = this.newlineOrNonNewlineStringRe.exec(text);
549
- if (m) {
550
- anyMatch = true;
551
- if (m[0] === '\n') {
552
- inlineTextBuilder.startNewLine();
553
- } else if (previouslyStashedSpace) {
554
- inlineTextBuilder.pushWord(m[0], noWrap);
555
- } else {
556
- inlineTextBuilder.concatWord(m[0], noWrap);
557
- }
558
- while ((m = this.newlineOrNonNewlineStringRe.exec(text)) !== null) {
559
- if (m[0] === '\n') {
560
- inlineTextBuilder.startNewLine();
561
- } else {
562
- inlineTextBuilder.pushWord(m[0], noWrap);
563
- }
564
- }
565
- }
566
- inlineTextBuilder.stashedSpace = (previouslyStashedSpace && !anyMatch);
567
- }
568
-
569
- /**
570
- * Test whether the given text starts with HTML whitespace character.
571
- *
572
- * @param { string } text The string to test.
573
- * @returns { boolean }
574
- */
575
- testLeadingWhitespace (text) {
576
- return this.leadingWhitespaceRe.test(text);
577
- }
578
-
579
- /**
580
- * Test whether the given text ends with HTML whitespace character.
581
- *
582
- * @param { string } text The string to test.
583
- * @returns { boolean }
584
- */
585
- testTrailingWhitespace (text) {
586
- return this.trailingWhitespaceRe.test(text);
587
- }
588
-
589
- /**
590
- * Test whether the given text contains any non-whitespace characters.
591
- *
592
- * @param { string } text The string to test.
593
- * @returns { boolean }
594
- */
595
- testContainsWords (text) {
596
- return !this.allWhitespaceOrEmptyRe.test(text);
597
- }
598
-
599
- /**
600
- * Return the number of newlines if there are no words.
601
- *
602
- * If any word is found then return zero regardless of the actual number of newlines.
603
- *
604
- * @param { string } text Input string.
605
- * @returns { number }
606
- */
607
- countNewlinesNoWords (text) {
608
- this.newlineOrNonWhitespaceRe.lastIndex = 0;
609
- let counter = 0;
610
- let match;
611
- while ((match = this.newlineOrNonWhitespaceRe.exec(text)) !== null) {
612
- if (match[0] === '\n') {
613
- counter++;
614
- } else {
615
- return 0;
616
- }
617
- }
618
- return counter;
619
- }
620
-
621
- }
622
-
623
- /**
624
- * Helps to build text from inline and block elements.
625
- *
626
- * @class BlockTextBuilder
627
- */
628
- class BlockTextBuilder {
629
-
630
- /**
631
- * Creates an instance of BlockTextBuilder.
632
- *
633
- * @param { Options } options HtmlToText options.
634
- * @param { import('selderee').Picker<DomNode, TagDefinition> } picker Selectors decision tree picker.
635
- * @param { any} [metadata] Optional metadata for HTML document, for use in formatters.
636
- */
637
- constructor (options, picker, metadata = undefined) {
638
- this.options = options;
639
- this.picker = picker;
640
- this.metadata = metadata;
641
- this.whitespaceProcessor = new WhitespaceProcessor(options);
642
- /** @type { StackItem } */
643
- this._stackItem = new BlockStackItem(options);
644
- /** @type { TransformerStackItem } */
645
- this._wordTransformer = undefined;
646
- }
647
-
648
- /**
649
- * Put a word-by-word transform function onto the transformations stack.
650
- *
651
- * Mainly used for uppercasing. Can be bypassed to add unformatted text such as URLs.
652
- *
653
- * Word transformations applied before wrapping.
654
- *
655
- * @param { (str: string) => string } wordTransform Word transformation function.
656
- */
657
- pushWordTransform (wordTransform) {
658
- this._wordTransformer = new TransformerStackItem(this._wordTransformer, wordTransform);
659
- }
660
-
661
- /**
662
- * Remove a function from the word transformations stack.
663
- *
664
- * @returns { (str: string) => string } A function that was removed.
665
- */
666
- popWordTransform () {
667
- if (!this._wordTransformer) { return undefined; }
668
- const transform = this._wordTransformer.transform;
669
- this._wordTransformer = this._wordTransformer.next;
670
- return transform;
671
- }
672
-
673
- /**
674
- * Ignore wordwrap option in followup inline additions and disable automatic wrapping.
675
- */
676
- startNoWrap () {
677
- this._stackItem.isNoWrap = true;
678
- }
679
-
680
- /**
681
- * Return automatic wrapping to behavior defined by options.
682
- */
683
- stopNoWrap () {
684
- this._stackItem.isNoWrap = false;
685
- }
686
-
687
- /** @returns { (str: string) => string } */
688
- _getCombinedWordTransformer () {
689
- const wt = (this._wordTransformer)
690
- ? ((str) => applyTransformer(str, this._wordTransformer))
691
- : undefined;
692
- const ce = this.options.encodeCharacters;
693
- return (wt)
694
- ? ((ce) ? (str) => ce(wt(str)) : wt)
695
- : ce;
696
- }
697
-
698
- _popStackItem () {
699
- const item = this._stackItem;
700
- this._stackItem = item.next;
701
- return item;
702
- }
703
-
704
- /**
705
- * Add a line break into currently built block.
706
- */
707
- addLineBreak () {
708
- if (!(
709
- this._stackItem instanceof BlockStackItem
710
- || this._stackItem instanceof ListItemStackItem
711
- || this._stackItem instanceof TableCellStackItem
712
- )) { return; }
713
- if (this._stackItem.isPre) {
714
- this._stackItem.rawText += '\n';
715
- } else {
716
- this._stackItem.inlineTextBuilder.startNewLine();
717
- }
718
- }
719
-
720
- /**
721
- * Allow to break line in case directly following text will not fit.
722
- */
723
- addWordBreakOpportunity () {
724
- if (
725
- this._stackItem instanceof BlockStackItem
726
- || this._stackItem instanceof ListItemStackItem
727
- || this._stackItem instanceof TableCellStackItem
728
- ) {
729
- this._stackItem.inlineTextBuilder.wordBreakOpportunity = true;
730
- }
731
- }
732
-
733
- /**
734
- * Add a node inline into the currently built block.
735
- *
736
- * @param { string } str
737
- * Text content of a node to add.
738
- *
739
- * @param { object } [param1]
740
- * Object holding the parameters of the operation.
741
- *
742
- * @param { boolean } [param1.noWordTransform]
743
- * Ignore word transformers if there are any.
744
- * Don't encode characters as well.
745
- * (Use this for things like URL addresses).
746
- */
747
- addInline (str, { noWordTransform = false } = {}) {
748
- if (!(
749
- this._stackItem instanceof BlockStackItem
750
- || this._stackItem instanceof ListItemStackItem
751
- || this._stackItem instanceof TableCellStackItem
752
- )) { return; }
753
-
754
- if (this._stackItem.isPre) {
755
- this._stackItem.rawText += str;
756
- return;
757
- }
758
-
759
- if (
760
- str.length === 0 || // empty string
761
- (
762
- this._stackItem.stashedLineBreaks && // stashed linebreaks make whitespace irrelevant
763
- !this.whitespaceProcessor.testContainsWords(str) // no words to add
764
- )
765
- ) { return; }
766
-
767
- if (this.options.preserveNewlines) {
768
- const newlinesNumber = this.whitespaceProcessor.countNewlinesNoWords(str);
769
- if (newlinesNumber > 0) {
770
- this._stackItem.inlineTextBuilder.startNewLine(newlinesNumber);
771
- // keep stashedLineBreaks unchanged
772
- return;
773
- }
774
- }
775
-
776
- if (this._stackItem.stashedLineBreaks) {
777
- this._stackItem.inlineTextBuilder.startNewLine(this._stackItem.stashedLineBreaks);
778
- }
779
- this.whitespaceProcessor.shrinkWrapAdd(
780
- str,
781
- this._stackItem.inlineTextBuilder,
782
- (noWordTransform) ? undefined : this._getCombinedWordTransformer(),
783
- this._stackItem.isNoWrap
784
- );
785
- this._stackItem.stashedLineBreaks = 0; // inline text doesn't introduce line breaks
786
- }
787
-
788
- /**
789
- * Add a string inline into the currently built block.
790
- *
791
- * Use this for markup elements that don't have to adhere
792
- * to text layout rules.
793
- *
794
- * @param { string } str Text to add.
795
- */
796
- addLiteral (str) {
797
- if (!(
798
- this._stackItem instanceof BlockStackItem
799
- || this._stackItem instanceof ListItemStackItem
800
- || this._stackItem instanceof TableCellStackItem
801
- )) { return; }
802
-
803
- if (str.length === 0) { return; }
804
-
805
- if (this._stackItem.isPre) {
806
- this._stackItem.rawText += str;
807
- return;
808
- }
809
-
810
- if (this._stackItem.stashedLineBreaks) {
811
- this._stackItem.inlineTextBuilder.startNewLine(this._stackItem.stashedLineBreaks);
812
- }
813
- this.whitespaceProcessor.addLiteral(
814
- str,
815
- this._stackItem.inlineTextBuilder,
816
- this._stackItem.isNoWrap
817
- );
818
- this._stackItem.stashedLineBreaks = 0;
819
- }
820
-
821
- /**
822
- * Start building a new block.
823
- *
824
- * @param { object } [param0]
825
- * Object holding the parameters of the block.
826
- *
827
- * @param { number } [param0.leadingLineBreaks]
828
- * This block should have at least this number of line breaks to separate it from any preceding block.
829
- *
830
- * @param { number } [param0.reservedLineLength]
831
- * Reserve this number of characters on each line for block markup.
832
- *
833
- * @param { boolean } [param0.isPre]
834
- * Should HTML whitespace be preserved inside this block.
835
- */
836
- openBlock ({ leadingLineBreaks = 1, reservedLineLength = 0, isPre = false } = {}) {
837
- const maxLineLength = Math.max(20, this._stackItem.inlineTextBuilder.maxLineLength - reservedLineLength);
838
- this._stackItem = new BlockStackItem(
839
- this.options,
840
- this._stackItem,
841
- leadingLineBreaks,
842
- maxLineLength
843
- );
844
- if (isPre) { this._stackItem.isPre = true; }
845
- }
846
-
847
- /**
848
- * Finalize currently built block, add it's content to the parent block.
849
- *
850
- * @param { object } [param0]
851
- * Object holding the parameters of the block.
852
- *
853
- * @param { number } [param0.trailingLineBreaks]
854
- * This block should have at least this number of line breaks to separate it from any following block.
855
- *
856
- * @param { (str: string) => string } [param0.blockTransform]
857
- * A function to transform the block text before adding to the parent block.
858
- * This happens after word wrap and should be used in combination with reserved line length
859
- * in order to keep line lengths correct.
860
- * Used for whole block markup.
861
- */
862
- closeBlock ({ trailingLineBreaks = 1, blockTransform = undefined } = {}) {
863
- const block = this._popStackItem();
864
- const blockText = (blockTransform) ? blockTransform(getText(block)) : getText(block);
865
- addText(this._stackItem, blockText, block.leadingLineBreaks, Math.max(block.stashedLineBreaks, trailingLineBreaks));
866
- }
867
-
868
- /**
869
- * Start building a new list.
870
- *
871
- * @param { object } [param0]
872
- * Object holding the parameters of the list.
873
- *
874
- * @param { number } [param0.maxPrefixLength]
875
- * Length of the longest list item prefix.
876
- * If not supplied or too small then list items won't be aligned properly.
877
- *
878
- * @param { 'left' | 'right' } [param0.prefixAlign]
879
- * Specify how prefixes of different lengths have to be aligned
880
- * within a column.
881
- *
882
- * @param { number } [param0.interRowLineBreaks]
883
- * Minimum number of line breaks between list items.
884
- *
885
- * @param { number } [param0.leadingLineBreaks]
886
- * This list should have at least this number of line breaks to separate it from any preceding block.
887
- */
888
- openList ({ maxPrefixLength = 0, prefixAlign = 'left', interRowLineBreaks = 1, leadingLineBreaks = 2 } = {}) {
889
- this._stackItem = new ListStackItem(this.options, this._stackItem, {
890
- interRowLineBreaks: interRowLineBreaks,
891
- leadingLineBreaks: leadingLineBreaks,
892
- maxLineLength: this._stackItem.inlineTextBuilder.maxLineLength,
893
- maxPrefixLength: maxPrefixLength,
894
- prefixAlign: prefixAlign
895
- });
896
- }
897
-
898
- /**
899
- * Start building a new list item.
900
- *
901
- * @param {object} param0
902
- * Object holding the parameters of the list item.
903
- *
904
- * @param { string } [param0.prefix]
905
- * Prefix for this list item (item number, bullet point, etc).
906
- */
907
- openListItem ({ prefix = '' } = {}) {
908
- if (!(this._stackItem instanceof ListStackItem)) {
909
- throw new Error('Can\'t add a list item to something that is not a list! Check the formatter.');
910
- }
911
- const list = this._stackItem;
912
- const prefixLength = Math.max(prefix.length, list.maxPrefixLength);
913
- const maxLineLength = Math.max(20, list.inlineTextBuilder.maxLineLength - prefixLength);
914
- this._stackItem = new ListItemStackItem(this.options, list, {
915
- prefix: prefix,
916
- maxLineLength: maxLineLength,
917
- leadingLineBreaks: list.interRowLineBreaks
918
- });
919
- }
920
-
921
- /**
922
- * Finalize currently built list item, add it's content to the parent list.
923
- */
924
- closeListItem () {
925
- const listItem = this._popStackItem();
926
- const list = listItem.next;
927
-
928
- const prefixLength = Math.max(listItem.prefix.length, list.maxPrefixLength);
929
- const spacing = '\n' + ' '.repeat(prefixLength);
930
- const prefix = (list.prefixAlign === 'right')
931
- ? listItem.prefix.padStart(prefixLength)
932
- : listItem.prefix.padEnd(prefixLength);
933
- const text = prefix + getText(listItem).replace(/\n/g, spacing);
934
-
935
- addText(
936
- list,
937
- text,
938
- listItem.leadingLineBreaks,
939
- Math.max(listItem.stashedLineBreaks, list.interRowLineBreaks)
940
- );
941
- }
942
-
943
- /**
944
- * Finalize currently built list, add it's content to the parent block.
945
- *
946
- * @param { object } param0
947
- * Object holding the parameters of the list.
948
- *
949
- * @param { number } [param0.trailingLineBreaks]
950
- * This list should have at least this number of line breaks to separate it from any following block.
951
- */
952
- closeList ({ trailingLineBreaks = 2 } = {}) {
953
- const list = this._popStackItem();
954
- const text = getText(list);
955
- if (text) {
956
- addText(this._stackItem, text, list.leadingLineBreaks, trailingLineBreaks);
957
- }
958
- }
959
-
960
- /**
961
- * Start building a table.
962
- */
963
- openTable () {
964
- this._stackItem = new TableStackItem(this._stackItem);
965
- }
966
-
967
- /**
968
- * Start building a table row.
969
- */
970
- openTableRow () {
971
- if (!(this._stackItem instanceof TableStackItem)) {
972
- throw new Error('Can\'t add a table row to something that is not a table! Check the formatter.');
973
- }
974
- this._stackItem = new TableRowStackItem(this._stackItem);
975
- }
976
-
977
- /**
978
- * Start building a table cell.
979
- *
980
- * @param { object } [param0]
981
- * Object holding the parameters of the cell.
982
- *
983
- * @param { number } [param0.maxColumnWidth]
984
- * Wrap cell content to this width. Fall back to global wordwrap value if undefined.
985
- */
986
- openTableCell ({ maxColumnWidth = undefined } = {}) {
987
- if (!(this._stackItem instanceof TableRowStackItem)) {
988
- throw new Error('Can\'t add a table cell to something that is not a table row! Check the formatter.');
989
- }
990
- this._stackItem = new TableCellStackItem(this.options, this._stackItem, maxColumnWidth);
991
- }
992
-
993
- /**
994
- * Finalize currently built table cell and add it to parent table row's cells.
995
- *
996
- * @param { object } [param0]
997
- * Object holding the parameters of the cell.
998
- *
999
- * @param { number } [param0.colspan] How many columns this cell should occupy.
1000
- * @param { number } [param0.rowspan] How many rows this cell should occupy.
1001
- */
1002
- closeTableCell ({ colspan = 1, rowspan = 1 } = {}) {
1003
- const cell = this._popStackItem();
1004
- const text = trimCharacter(getText(cell), '\n');
1005
- cell.next.cells.push({ colspan: colspan, rowspan: rowspan, text: text });
1006
- }
1007
-
1008
- /**
1009
- * Finalize currently built table row and add it to parent table's rows.
1010
- */
1011
- closeTableRow () {
1012
- const row = this._popStackItem();
1013
- row.next.rows.push(row.cells);
1014
- }
1015
-
1016
- /**
1017
- * Finalize currently built table and add the rendered text to the parent block.
1018
- *
1019
- * @param { object } param0
1020
- * Object holding the parameters of the table.
1021
- *
1022
- * @param { TablePrinter } param0.tableToString
1023
- * A function to convert a table of stringified cells into a complete table.
1024
- *
1025
- * @param { number } [param0.leadingLineBreaks]
1026
- * This table should have at least this number of line breaks to separate if from any preceding block.
1027
- *
1028
- * @param { number } [param0.trailingLineBreaks]
1029
- * This table should have at least this number of line breaks to separate it from any following block.
1030
- */
1031
- closeTable ({ tableToString, leadingLineBreaks = 2, trailingLineBreaks = 2 }) {
1032
- const table = this._popStackItem();
1033
- const output = tableToString(table.rows);
1034
- if (output) {
1035
- addText(this._stackItem, output, leadingLineBreaks, trailingLineBreaks);
1036
- }
1037
- }
1038
-
1039
- /**
1040
- * Return the rendered text content of this builder.
1041
- *
1042
- * @returns { string }
1043
- */
1044
- toString () {
1045
- return getText(this._stackItem.getRoot());
1046
- // There should only be the root item if everything is closed properly.
1047
- }
1048
-
1049
- }
1050
-
1051
- function getText (stackItem) {
1052
- if (!(
1053
- stackItem instanceof BlockStackItem
1054
- || stackItem instanceof ListItemStackItem
1055
- || stackItem instanceof TableCellStackItem
1056
- )) {
1057
- throw new Error('Only blocks, list items and table cells can be requested for text contents.');
1058
- }
1059
- return (stackItem.inlineTextBuilder.isEmpty())
1060
- ? stackItem.rawText
1061
- : stackItem.rawText + stackItem.inlineTextBuilder.toString();
1062
- }
1063
-
1064
- function addText (stackItem, text, leadingLineBreaks, trailingLineBreaks) {
1065
- if (!(
1066
- stackItem instanceof BlockStackItem
1067
- || stackItem instanceof ListItemStackItem
1068
- || stackItem instanceof TableCellStackItem
1069
- )) {
1070
- throw new Error('Only blocks, list items and table cells can contain text.');
1071
- }
1072
- const parentText = getText(stackItem);
1073
- const lineBreaks = Math.max(stackItem.stashedLineBreaks, leadingLineBreaks);
1074
- stackItem.inlineTextBuilder.clear();
1075
- if (parentText) {
1076
- stackItem.rawText = parentText + '\n'.repeat(lineBreaks) + text;
1077
- } else {
1078
- stackItem.rawText = text;
1079
- stackItem.leadingLineBreaks = lineBreaks;
1080
- }
1081
- stackItem.stashedLineBreaks = trailingLineBreaks;
1082
- }
1083
-
1084
- /**
1085
- * @param { string } str A string to transform.
1086
- * @param { TransformerStackItem } transformer A transformer item (with possible continuation).
1087
- * @returns { string }
1088
- */
1089
- function applyTransformer (str, transformer) {
1090
- return ((transformer) ? applyTransformer(transformer.transform(str), transformer.next) : str);
1091
- }
1092
-
1093
- /**
1094
- * Compile selectors into a decision tree,
1095
- * return a function intended for batch processing.
1096
- *
1097
- * @param { Options } [options = {}] HtmlToText options (defaults, formatters, user options merged, deduplicated).
1098
- * @returns { (html: string, metadata?: any) => string } Pre-configured converter function.
1099
- * @static
1100
- */
1101
- function compile$1 (options = {}) {
1102
- const selectorsWithoutFormat = options.selectors.filter(s => !s.format);
1103
- if (selectorsWithoutFormat.length) {
1104
- throw new Error(
1105
- 'Following selectors have no specified format: ' +
1106
- selectorsWithoutFormat.map(s => `\`${s.selector}\``).join(', ')
1107
- );
1108
- }
1109
- const picker = new DecisionTree(
1110
- options.selectors.map(s => [s.selector, s])
1111
- ).build(hp2Builder);
1112
-
1113
- if (typeof options.encodeCharacters !== 'function') {
1114
- options.encodeCharacters = makeReplacerFromDict(options.encodeCharacters);
1115
- }
1116
-
1117
- const baseSelectorsPicker = new DecisionTree(
1118
- options.baseElements.selectors.map((s, i) => [s, i + 1])
1119
- ).build(hp2Builder);
1120
- function findBaseElements (dom) {
1121
- return findBases(dom, options, baseSelectorsPicker);
1122
- }
1123
-
1124
- const limitedWalk = limitedDepthRecursive(
1125
- options.limits.maxDepth,
1126
- recursiveWalk,
1127
- function (dom, builder) {
1128
- builder.addInline(options.limits.ellipsis || '');
1129
- }
1130
- );
1131
-
1132
- return function (html, metadata = undefined) {
1133
- return process(html, metadata, options, picker, findBaseElements, limitedWalk);
1134
- };
1135
- }
1136
-
1137
-
1138
- /**
1139
- * Convert given HTML according to preprocessed options.
1140
- *
1141
- * @param { string } html HTML content to convert.
1142
- * @param { any } metadata Optional metadata for HTML document, for use in formatters.
1143
- * @param { Options } options HtmlToText options (preprocessed).
1144
- * @param { import('selderee').Picker<DomNode, TagDefinition> } picker
1145
- * Tag definition picker for DOM nodes processing.
1146
- * @param { (dom: DomNode[]) => DomNode[] } findBaseElements
1147
- * Function to extract elements from HTML DOM
1148
- * that will only be present in the output text.
1149
- * @param { RecursiveCallback } walk Recursive callback.
1150
- * @returns { string }
1151
- */
1152
- function process (html, metadata, options, picker, findBaseElements, walk) {
1153
- const maxInputLength = options.limits.maxInputLength;
1154
- if (maxInputLength && html && html.length > maxInputLength) {
1155
- console.warn(
1156
- `Input length ${html.length} is above allowed limit of ${maxInputLength}. Truncating without ellipsis.`
1157
- );
1158
- html = html.substring(0, maxInputLength);
1159
- }
1160
-
1161
- const document = parseDocument(html, { decodeEntities: options.decodeEntities });
1162
- const bases = findBaseElements(document.children);
1163
- const builder = new BlockTextBuilder(options, picker, metadata);
1164
- walk(bases, builder);
1165
- return builder.toString();
1166
- }
1167
-
1168
-
1169
- function findBases (dom, options, baseSelectorsPicker) {
1170
- const results = [];
1171
-
1172
- function recursiveWalk (walk, /** @type { DomNode[] } */ dom) {
1173
- dom = dom.slice(0, options.limits.maxChildNodes);
1174
- for (const elem of dom) {
1175
- if (elem.type !== 'tag') {
1176
- continue;
1177
- }
1178
- const pickedSelectorIndex = baseSelectorsPicker.pick1(elem);
1179
- if (pickedSelectorIndex > 0) {
1180
- results.push({ selectorIndex: pickedSelectorIndex, element: elem });
1181
- } else if (elem.children) {
1182
- walk(elem.children);
1183
- }
1184
- if (results.length >= options.limits.maxBaseElements) {
1185
- return;
1186
- }
1187
- }
1188
- }
1189
-
1190
- const limitedWalk = limitedDepthRecursive(
1191
- options.limits.maxDepth,
1192
- recursiveWalk
1193
- );
1194
- limitedWalk(dom);
1195
-
1196
- if (options.baseElements.orderBy !== 'occurrence') { // 'selectors'
1197
- results.sort((a, b) => a.selectorIndex - b.selectorIndex);
1198
- }
1199
- return (options.baseElements.returnDomByDefault && results.length === 0)
1200
- ? dom
1201
- : results.map(x => x.element);
1202
- }
1203
-
1204
- /**
1205
- * Function to walk through DOM nodes and accumulate their string representations.
1206
- *
1207
- * @param { RecursiveCallback } walk Recursive callback.
1208
- * @param { DomNode[] } [dom] Nodes array to process.
1209
- * @param { BlockTextBuilder } builder Passed around to accumulate output text.
1210
- * @private
1211
- */
1212
- function recursiveWalk (walk, dom, builder) {
1213
- if (!dom) { return; }
1214
-
1215
- const options = builder.options;
1216
-
1217
- const tooManyChildNodes = dom.length > options.limits.maxChildNodes;
1218
- if (tooManyChildNodes) {
1219
- dom = dom.slice(0, options.limits.maxChildNodes);
1220
- dom.push({
1221
- data: options.limits.ellipsis,
1222
- type: 'text'
1223
- });
1224
- }
1225
-
1226
- for (const elem of dom) {
1227
- switch (elem.type) {
1228
- case 'text': {
1229
- builder.addInline(elem.data);
1230
- break;
1231
- }
1232
- case 'tag': {
1233
- const tagDefinition = builder.picker.pick1(elem);
1234
- const format = options.formatters[tagDefinition.format];
1235
- format(elem, walk, builder, tagDefinition.options || {});
1236
- break;
1237
- }
1238
- }
1239
- }
1240
-
1241
- return;
1242
- }
1243
-
1244
- /**
1245
- * @param { Object<string,string | false> } dict
1246
- * A dictionary where keys are characters to replace
1247
- * and values are replacement strings.
1248
- *
1249
- * First code point from dict keys is used.
1250
- * Compound emojis with ZWJ are not supported (not until Node 16).
1251
- *
1252
- * @returns { ((str: string) => string) | undefined }
1253
- */
1254
- function makeReplacerFromDict (dict) {
1255
- if (!dict || Object.keys(dict).length === 0) {
1256
- return undefined;
1257
- }
1258
- /** @type { [string, string][] } */
1259
- const entries = Object.entries(dict).filter(([, v]) => v !== false);
1260
- const regex = new RegExp(
1261
- entries
1262
- .map(([c]) => `(${unicodeEscape([...c][0])})`)
1263
- .join('|'),
1264
- 'g'
1265
- );
1266
- const values = entries.map(([, v]) => v);
1267
- const replacer = (m, ...cgs) => values[cgs.findIndex(cg => cg)];
1268
- return (str) => str.replace(regex, replacer);
1269
- }
1270
-
1271
- /**
1272
- * Dummy formatter that discards the input and does nothing.
1273
- *
1274
- * @type { FormatCallback }
1275
- */
1276
- function formatSkip (elem, walk, builder, formatOptions) {
1277
- /* do nothing */
1278
- }
1279
-
1280
- /**
1281
- * Insert the given string literal inline instead of a tag.
1282
- *
1283
- * @type { FormatCallback }
1284
- */
1285
- function formatInlineString (elem, walk, builder, formatOptions) {
1286
- builder.addLiteral(formatOptions.string || '');
1287
- }
1288
-
1289
- /**
1290
- * Insert a block with the given string literal instead of a tag.
1291
- *
1292
- * @type { FormatCallback }
1293
- */
1294
- function formatBlockString (elem, walk, builder, formatOptions) {
1295
- builder.openBlock({ leadingLineBreaks: formatOptions.leadingLineBreaks || 2 });
1296
- builder.addLiteral(formatOptions.string || '');
1297
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks || 2 });
1298
- }
1299
-
1300
- /**
1301
- * Process an inline-level element.
1302
- *
1303
- * @type { FormatCallback }
1304
- */
1305
- function formatInline (elem, walk, builder, formatOptions) {
1306
- walk(elem.children, builder);
1307
- }
1308
-
1309
- /**
1310
- * Process a block-level container.
1311
- *
1312
- * @type { FormatCallback }
1313
- */
1314
- function formatBlock$1 (elem, walk, builder, formatOptions) {
1315
- builder.openBlock({ leadingLineBreaks: formatOptions.leadingLineBreaks || 2 });
1316
- walk(elem.children, builder);
1317
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks || 2 });
1318
- }
1319
-
1320
- function renderOpenTag (elem) {
1321
- const attrs = (elem.attribs && elem.attribs.length)
1322
- ? ' ' + Object.entries(elem.attribs)
1323
- .map(([k, v]) => ((v === '') ? k : `${k}=${v.replace(/"/g, '&quot;')}`))
1324
- .join(' ')
1325
- : '';
1326
- return `<${elem.name}${attrs}>`;
1327
- }
1328
-
1329
- function renderCloseTag (elem) {
1330
- return `</${elem.name}>`;
1331
- }
1332
-
1333
- /**
1334
- * Render an element as inline HTML tag, walk through it's children.
1335
- *
1336
- * @type { FormatCallback }
1337
- */
1338
- function formatInlineTag (elem, walk, builder, formatOptions) {
1339
- builder.startNoWrap();
1340
- builder.addLiteral(renderOpenTag(elem));
1341
- builder.stopNoWrap();
1342
- walk(elem.children, builder);
1343
- builder.startNoWrap();
1344
- builder.addLiteral(renderCloseTag(elem));
1345
- builder.stopNoWrap();
1346
- }
1347
-
1348
- /**
1349
- * Render an element as HTML block bag, walk through it's children.
1350
- *
1351
- * @type { FormatCallback }
1352
- */
1353
- function formatBlockTag (elem, walk, builder, formatOptions) {
1354
- builder.openBlock({ leadingLineBreaks: formatOptions.leadingLineBreaks || 2 });
1355
- builder.startNoWrap();
1356
- builder.addLiteral(renderOpenTag(elem));
1357
- builder.stopNoWrap();
1358
- walk(elem.children, builder);
1359
- builder.startNoWrap();
1360
- builder.addLiteral(renderCloseTag(elem));
1361
- builder.stopNoWrap();
1362
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks || 2 });
1363
- }
1364
-
1365
- /**
1366
- * Render an element with all it's children as inline HTML.
1367
- *
1368
- * @type { FormatCallback }
1369
- */
1370
- function formatInlineHtml (elem, walk, builder, formatOptions) {
1371
- builder.startNoWrap();
1372
- builder.addLiteral(
1373
- render(elem, { decodeEntities: builder.options.decodeEntities })
1374
- );
1375
- builder.stopNoWrap();
1376
- }
1377
-
1378
- /**
1379
- * Render an element with all it's children as HTML block.
1380
- *
1381
- * @type { FormatCallback }
1382
- */
1383
- function formatBlockHtml (elem, walk, builder, formatOptions) {
1384
- builder.openBlock({ leadingLineBreaks: formatOptions.leadingLineBreaks || 2 });
1385
- builder.startNoWrap();
1386
- builder.addLiteral(
1387
- render(elem, { decodeEntities: builder.options.decodeEntities })
1388
- );
1389
- builder.stopNoWrap();
1390
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks || 2 });
1391
- }
1392
-
1393
- /**
1394
- * Render inline element wrapped with given strings.
1395
- *
1396
- * @type { FormatCallback }
1397
- */
1398
- function formatInlineSurround (elem, walk, builder, formatOptions) {
1399
- builder.addLiteral(formatOptions.prefix || '');
1400
- walk(elem.children, builder);
1401
- builder.addLiteral(formatOptions.suffix || '');
1402
- }
1403
-
1404
- var genericFormatters = /*#__PURE__*/Object.freeze({
1405
- __proto__: null,
1406
- block: formatBlock$1,
1407
- blockHtml: formatBlockHtml,
1408
- blockString: formatBlockString,
1409
- blockTag: formatBlockTag,
1410
- inline: formatInline,
1411
- inlineHtml: formatInlineHtml,
1412
- inlineString: formatInlineString,
1413
- inlineSurround: formatInlineSurround,
1414
- inlineTag: formatInlineTag,
1415
- skip: formatSkip
1416
- });
1417
-
1418
- function getRow (matrix, j) {
1419
- if (!matrix[j]) { matrix[j] = []; }
1420
- return matrix[j];
1421
- }
1422
-
1423
- function findFirstVacantIndex (row, x = 0) {
1424
- while (row[x]) { x++; }
1425
- return x;
1426
- }
1427
-
1428
- function transposeInPlace (matrix, maxSize) {
1429
- for (let i = 0; i < maxSize; i++) {
1430
- const rowI = getRow(matrix, i);
1431
- for (let j = 0; j < i; j++) {
1432
- const rowJ = getRow(matrix, j);
1433
- if (rowI[j] || rowJ[i]) {
1434
- const temp = rowI[j];
1435
- rowI[j] = rowJ[i];
1436
- rowJ[i] = temp;
1437
- }
1438
- }
1439
- }
1440
- }
1441
-
1442
- function putCellIntoLayout (cell, layout, baseRow, baseCol) {
1443
- for (let r = 0; r < cell.rowspan; r++) {
1444
- const layoutRow = getRow(layout, baseRow + r);
1445
- for (let c = 0; c < cell.colspan; c++) {
1446
- layoutRow[baseCol + c] = cell;
1447
- }
1448
- }
1449
- }
1450
-
1451
- function getOrInitOffset (offsets, index) {
1452
- if (offsets[index] === undefined) {
1453
- offsets[index] = (index === 0) ? 0 : 1 + getOrInitOffset(offsets, index - 1);
1454
- }
1455
- return offsets[index];
1456
- }
1457
-
1458
- function updateOffset (offsets, base, span, value) {
1459
- offsets[base + span] = Math.max(
1460
- getOrInitOffset(offsets, base + span),
1461
- getOrInitOffset(offsets, base) + value
1462
- );
1463
- }
1464
-
1465
- /**
1466
- * Render a table into a string.
1467
- * Cells can contain multiline text and span across multiple rows and columns.
1468
- *
1469
- * Modifies cells to add lines array.
1470
- *
1471
- * @param { TablePrinterCell[][] } tableRows Table to render.
1472
- * @param { number } rowSpacing Number of spaces between columns.
1473
- * @param { number } colSpacing Number of empty lines between rows.
1474
- * @returns { string }
1475
- */
1476
- function tableToString (tableRows, rowSpacing, colSpacing) {
1477
- const layout = [];
1478
- let colNumber = 0;
1479
- const rowNumber = tableRows.length;
1480
- const rowOffsets = [0];
1481
- // Fill the layout table and row offsets row-by-row.
1482
- for (let j = 0; j < rowNumber; j++) {
1483
- const layoutRow = getRow(layout, j);
1484
- const cells = tableRows[j];
1485
- let x = 0;
1486
- for (let i = 0; i < cells.length; i++) {
1487
- const cell = cells[i];
1488
- x = findFirstVacantIndex(layoutRow, x);
1489
- putCellIntoLayout(cell, layout, j, x);
1490
- x += cell.colspan;
1491
- cell.lines = cell.text.split('\n');
1492
- const cellHeight = cell.lines.length;
1493
- updateOffset(rowOffsets, j, cell.rowspan, cellHeight + rowSpacing);
1494
- }
1495
- colNumber = (layoutRow.length > colNumber) ? layoutRow.length : colNumber;
1496
- }
1497
-
1498
- transposeInPlace(layout, (rowNumber > colNumber) ? rowNumber : colNumber);
1499
-
1500
- const outputLines = [];
1501
- const colOffsets = [0];
1502
- // Fill column offsets and output lines column-by-column.
1503
- for (let x = 0; x < colNumber; x++) {
1504
- let y = 0;
1505
- let cell;
1506
- const rowsInThisColumn = Math.min(rowNumber, layout[x].length);
1507
- while (y < rowsInThisColumn) {
1508
- cell = layout[x][y];
1509
- if (cell) {
1510
- if (!cell.rendered) {
1511
- let cellWidth = 0;
1512
- for (let j = 0; j < cell.lines.length; j++) {
1513
- const line = cell.lines[j];
1514
- const lineOffset = rowOffsets[y] + j;
1515
- outputLines[lineOffset] = (outputLines[lineOffset] || '').padEnd(colOffsets[x]) + line;
1516
- cellWidth = (line.length > cellWidth) ? line.length : cellWidth;
1517
- }
1518
- updateOffset(colOffsets, x, cell.colspan, cellWidth + colSpacing);
1519
- cell.rendered = true;
1520
- }
1521
- y += cell.rowspan;
1522
- } else {
1523
- const lineOffset = rowOffsets[y];
1524
- outputLines[lineOffset] = (outputLines[lineOffset] || '');
1525
- y++;
1526
- }
1527
- }
1528
- }
1529
-
1530
- return outputLines.join('\n');
1531
- }
1532
-
1533
- /**
1534
- * Process a line-break.
1535
- *
1536
- * @type { FormatCallback }
1537
- */
1538
- function formatLineBreak (elem, walk, builder, formatOptions) {
1539
- builder.addLineBreak();
1540
- }
1541
-
1542
- /**
1543
- * Process a `wbr` tag (word break opportunity).
1544
- *
1545
- * @type { FormatCallback }
1546
- */
1547
- function formatWbr (elem, walk, builder, formatOptions) {
1548
- builder.addWordBreakOpportunity();
1549
- }
1550
-
1551
- /**
1552
- * Process a horizontal line.
1553
- *
1554
- * @type { FormatCallback }
1555
- */
1556
- function formatHorizontalLine (elem, walk, builder, formatOptions) {
1557
- builder.openBlock({ leadingLineBreaks: formatOptions.leadingLineBreaks || 2 });
1558
- builder.addInline('-'.repeat(formatOptions.length || builder.options.wordwrap || 40));
1559
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks || 2 });
1560
- }
1561
-
1562
- /**
1563
- * Process a paragraph.
1564
- *
1565
- * @type { FormatCallback }
1566
- */
1567
- function formatParagraph (elem, walk, builder, formatOptions) {
1568
- builder.openBlock({ leadingLineBreaks: formatOptions.leadingLineBreaks || 2 });
1569
- walk(elem.children, builder);
1570
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks || 2 });
1571
- }
1572
-
1573
- /**
1574
- * Process a preformatted content.
1575
- *
1576
- * @type { FormatCallback }
1577
- */
1578
- function formatPre (elem, walk, builder, formatOptions) {
1579
- builder.openBlock({
1580
- isPre: true,
1581
- leadingLineBreaks: formatOptions.leadingLineBreaks || 2
1582
- });
1583
- walk(elem.children, builder);
1584
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks || 2 });
1585
- }
1586
-
1587
- /**
1588
- * Process a heading.
1589
- *
1590
- * @type { FormatCallback }
1591
- */
1592
- function formatHeading (elem, walk, builder, formatOptions) {
1593
- builder.openBlock({ leadingLineBreaks: formatOptions.leadingLineBreaks || 2 });
1594
- if (formatOptions.uppercase !== false) {
1595
- builder.pushWordTransform(str => str.toUpperCase());
1596
- walk(elem.children, builder);
1597
- builder.popWordTransform();
1598
- } else {
1599
- walk(elem.children, builder);
1600
- }
1601
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks || 2 });
1602
- }
1603
-
1604
- /**
1605
- * Process a blockquote.
1606
- *
1607
- * @type { FormatCallback }
1608
- */
1609
- function formatBlockquote (elem, walk, builder, formatOptions) {
1610
- builder.openBlock({
1611
- leadingLineBreaks: formatOptions.leadingLineBreaks || 2,
1612
- reservedLineLength: 2
1613
- });
1614
- walk(elem.children, builder);
1615
- builder.closeBlock({
1616
- trailingLineBreaks: formatOptions.trailingLineBreaks || 2,
1617
- blockTransform: str => ((formatOptions.trimEmptyLines !== false) ? trimCharacter(str, '\n') : str)
1618
- .split('\n')
1619
- .map(line => '> ' + line)
1620
- .join('\n')
1621
- });
1622
- }
1623
-
1624
- function withBrackets (str, brackets) {
1625
- if (!brackets) { return str; }
1626
-
1627
- const lbr = (typeof brackets[0] === 'string')
1628
- ? brackets[0]
1629
- : '[';
1630
- const rbr = (typeof brackets[1] === 'string')
1631
- ? brackets[1]
1632
- : ']';
1633
- return lbr + str + rbr;
1634
- }
1635
-
1636
- function pathRewrite (path, rewriter, baseUrl, metadata, elem) {
1637
- const modifiedPath = (typeof rewriter === 'function')
1638
- ? rewriter(path, metadata, elem)
1639
- : path;
1640
- return (modifiedPath[0] === '/' && baseUrl)
1641
- ? trimCharacterEnd(baseUrl, '/') + modifiedPath
1642
- : modifiedPath;
1643
- }
1644
-
1645
- /**
1646
- * Process an image.
1647
- *
1648
- * @type { FormatCallback }
1649
- */
1650
- function formatImage (elem, walk, builder, formatOptions) {
1651
- const attribs = elem.attribs || {};
1652
- const alt = (attribs.alt)
1653
- ? attribs.alt
1654
- : '';
1655
- const src = (!attribs.src)
1656
- ? ''
1657
- : pathRewrite(attribs.src, formatOptions.pathRewrite, formatOptions.baseUrl, builder.metadata, elem);
1658
- const text = (!src)
1659
- ? alt
1660
- : (!alt)
1661
- ? withBrackets(src, formatOptions.linkBrackets)
1662
- : alt + ' ' + withBrackets(src, formatOptions.linkBrackets);
1663
-
1664
- builder.addInline(text, { noWordTransform: true });
1665
- }
1666
-
1667
- // a img baseUrl
1668
- // a img pathRewrite
1669
- // a img linkBrackets
1670
-
1671
- // a ignoreHref: false
1672
- // ignoreText ?
1673
- // a noAnchorUrl: true
1674
- // can be replaced with selector
1675
- // a hideLinkHrefIfSameAsText: false
1676
- // how to compare, what to show (text, href, normalized) ?
1677
- // a mailto protocol removed without options
1678
-
1679
- // a protocols: mailto, tel, ...
1680
- // can be matched with selector?
1681
-
1682
- // anchors, protocols - only if no pathRewrite fn is provided
1683
-
1684
- // normalize-url ?
1685
-
1686
- // a
1687
- // a[href^="#"] - format:skip by default
1688
- // a[href^="mailto:"] - ?
1689
-
1690
- /**
1691
- * Process an anchor.
1692
- *
1693
- * @type { FormatCallback }
1694
- */
1695
- function formatAnchor (elem, walk, builder, formatOptions) {
1696
- function getHref () {
1697
- if (formatOptions.ignoreHref) { return ''; }
1698
- if (!elem.attribs || !elem.attribs.href) { return ''; }
1699
- let href = elem.attribs.href.replace(/^mailto:/, '');
1700
- if (formatOptions.noAnchorUrl && href[0] === '#') { return ''; }
1701
- href = pathRewrite(href, formatOptions.pathRewrite, formatOptions.baseUrl, builder.metadata, elem);
1702
- return href;
1703
- }
1704
- const href = getHref();
1705
- if (!href) {
1706
- walk(elem.children, builder);
1707
- } else {
1708
- let text = '';
1709
- builder.pushWordTransform(
1710
- str => {
1711
- if (str) { text += str; }
1712
- return str;
1713
- }
1714
- );
1715
- walk(elem.children, builder);
1716
- builder.popWordTransform();
1717
-
1718
- const hideSameLink = formatOptions.hideLinkHrefIfSameAsText && href === text;
1719
- if (!hideSameLink) {
1720
- builder.addInline(
1721
- (!text)
1722
- ? href
1723
- : ' ' + withBrackets(href, formatOptions.linkBrackets),
1724
- { noWordTransform: true }
1725
- );
1726
- }
1727
- }
1728
- }
1729
-
1730
- /**
1731
- * @param { DomNode } elem List items with their prefixes.
1732
- * @param { RecursiveCallback } walk Recursive callback to process child nodes.
1733
- * @param { BlockTextBuilder } builder Passed around to accumulate output text.
1734
- * @param { FormatOptions } formatOptions Options specific to a formatter.
1735
- * @param { () => string } nextPrefixCallback Function that returns increasing index each time it is called.
1736
- */
1737
- function formatList (elem, walk, builder, formatOptions, nextPrefixCallback) {
1738
- const isNestedList = get(elem, ['parent', 'name']) === 'li';
1739
-
1740
- // With Roman numbers, index length is not as straightforward as with Arabic numbers or letters,
1741
- // so the dumb length comparison is the most robust way to get the correct value.
1742
- let maxPrefixLength = 0;
1743
- const listItems = (elem.children || [])
1744
- // it might be more accurate to check only for html spaces here, but no significant benefit
1745
- .filter(child => child.type !== 'text' || !/^\s*$/.test(child.data))
1746
- .map(function (child) {
1747
- if (child.name !== 'li') {
1748
- return { node: child, prefix: '' };
1749
- }
1750
- const prefix = (isNestedList)
1751
- ? nextPrefixCallback().trimStart()
1752
- : nextPrefixCallback();
1753
- if (prefix.length > maxPrefixLength) { maxPrefixLength = prefix.length; }
1754
- return { node: child, prefix: prefix };
1755
- });
1756
- if (!listItems.length) { return; }
1757
-
1758
- builder.openList({
1759
- interRowLineBreaks: 1,
1760
- leadingLineBreaks: isNestedList ? 1 : (formatOptions.leadingLineBreaks || 2),
1761
- maxPrefixLength: maxPrefixLength,
1762
- prefixAlign: 'left'
1763
- });
1764
-
1765
- for (const { node, prefix } of listItems) {
1766
- builder.openListItem({ prefix: prefix });
1767
- walk([node], builder);
1768
- builder.closeListItem();
1769
- }
1770
-
1771
- builder.closeList({ trailingLineBreaks: isNestedList ? 1 : (formatOptions.trailingLineBreaks || 2) });
1772
- }
1773
-
1774
- /**
1775
- * Process an unordered list.
1776
- *
1777
- * @type { FormatCallback }
1778
- */
1779
- function formatUnorderedList (elem, walk, builder, formatOptions) {
1780
- const prefix = formatOptions.itemPrefix || ' * ';
1781
- return formatList(elem, walk, builder, formatOptions, () => prefix);
1782
- }
1783
-
1784
- /**
1785
- * Process an ordered list.
1786
- *
1787
- * @type { FormatCallback }
1788
- */
1789
- function formatOrderedList (elem, walk, builder, formatOptions) {
1790
- let nextIndex = Number(elem.attribs.start || '1');
1791
- const indexFunction = getOrderedListIndexFunction(elem.attribs.type);
1792
- const nextPrefixCallback = () => ' ' + indexFunction(nextIndex++) + '. ';
1793
- return formatList(elem, walk, builder, formatOptions, nextPrefixCallback);
1794
- }
1795
-
1796
- /**
1797
- * Return a function that can be used to generate index markers of a specified format.
1798
- *
1799
- * @param { string } [olType='1'] Marker type.
1800
- * @returns { (i: number) => string }
1801
- */
1802
- function getOrderedListIndexFunction (olType = '1') {
1803
- switch (olType) {
1804
- case 'a': return (i) => numberToLetterSequence(i, 'a');
1805
- case 'A': return (i) => numberToLetterSequence(i, 'A');
1806
- case 'i': return (i) => numberToRoman(i).toLowerCase();
1807
- case 'I': return (i) => numberToRoman(i);
1808
- case '1':
1809
- default: return (i) => (i).toString();
1810
- }
1811
- }
1812
-
1813
- /**
1814
- * Given a list of class and ID selectors (prefixed with '.' and '#'),
1815
- * return them as separate lists of names without prefixes.
1816
- *
1817
- * @param { string[] } selectors Class and ID selectors (`[".class", "#id"]` etc).
1818
- * @returns { { classes: string[], ids: string[] } }
1819
- */
1820
- function splitClassesAndIds (selectors) {
1821
- const classes = [];
1822
- const ids = [];
1823
- for (const selector of selectors) {
1824
- if (selector.startsWith('.')) {
1825
- classes.push(selector.substring(1));
1826
- } else if (selector.startsWith('#')) {
1827
- ids.push(selector.substring(1));
1828
- }
1829
- }
1830
- return { classes: classes, ids: ids };
1831
- }
1832
-
1833
- function isDataTable (attr, tables) {
1834
- if (tables === true) { return true; }
1835
- if (!attr) { return false; }
1836
-
1837
- const { classes, ids } = splitClassesAndIds(tables);
1838
- const attrClasses = (attr['class'] || '').split(' ');
1839
- const attrIds = (attr['id'] || '').split(' ');
1840
-
1841
- return attrClasses.some(x => classes.includes(x)) || attrIds.some(x => ids.includes(x));
1842
- }
1843
-
1844
- /**
1845
- * Process a table (either as a container or as a data table, depending on options).
1846
- *
1847
- * @type { FormatCallback }
1848
- */
1849
- function formatTable (elem, walk, builder, formatOptions) {
1850
- return isDataTable(elem.attribs, builder.options.tables)
1851
- ? formatDataTable(elem, walk, builder, formatOptions)
1852
- : formatBlock(elem, walk, builder, formatOptions);
1853
- }
1854
-
1855
- function formatBlock (elem, walk, builder, formatOptions) {
1856
- builder.openBlock({ leadingLineBreaks: formatOptions.leadingLineBreaks });
1857
- walk(elem.children, builder);
1858
- builder.closeBlock({ trailingLineBreaks: formatOptions.trailingLineBreaks });
1859
- }
1860
-
1861
- /**
1862
- * Process a data table.
1863
- *
1864
- * @type { FormatCallback }
1865
- */
1866
- function formatDataTable (elem, walk, builder, formatOptions) {
1867
- builder.openTable();
1868
- elem.children.forEach(walkTable);
1869
- builder.closeTable({
1870
- tableToString: (rows) => tableToString(rows, formatOptions.rowSpacing ?? 0, formatOptions.colSpacing ?? 3),
1871
- leadingLineBreaks: formatOptions.leadingLineBreaks,
1872
- trailingLineBreaks: formatOptions.trailingLineBreaks
1873
- });
1874
-
1875
- function formatCell (cellNode) {
1876
- const colspan = +get(cellNode, ['attribs', 'colspan']) || 1;
1877
- const rowspan = +get(cellNode, ['attribs', 'rowspan']) || 1;
1878
- builder.openTableCell({ maxColumnWidth: formatOptions.maxColumnWidth });
1879
- walk(cellNode.children, builder);
1880
- builder.closeTableCell({ colspan: colspan, rowspan: rowspan });
1881
- }
1882
-
1883
- function walkTable (elem) {
1884
- if (elem.type !== 'tag') { return; }
1885
-
1886
- const formatHeaderCell = (formatOptions.uppercaseHeaderCells !== false)
1887
- ? (cellNode) => {
1888
- builder.pushWordTransform(str => str.toUpperCase());
1889
- formatCell(cellNode);
1890
- builder.popWordTransform();
1891
- }
1892
- : formatCell;
1893
-
1894
- switch (elem.name) {
1895
- case 'thead':
1896
- case 'tbody':
1897
- case 'tfoot':
1898
- case 'center':
1899
- elem.children.forEach(walkTable);
1900
- return;
1901
-
1902
- case 'tr': {
1903
- builder.openTableRow();
1904
- for (const childOfTr of elem.children) {
1905
- if (childOfTr.type !== 'tag') { continue; }
1906
- switch (childOfTr.name) {
1907
- case 'th': {
1908
- formatHeaderCell(childOfTr);
1909
- break;
1910
- }
1911
- case 'td': {
1912
- formatCell(childOfTr);
1913
- break;
1914
- }
1915
- // do nothing
1916
- }
1917
- }
1918
- builder.closeTableRow();
1919
- break;
1920
- }
1921
- // do nothing
1922
- }
1923
- }
1924
- }
1925
-
1926
- var textFormatters = /*#__PURE__*/Object.freeze({
1927
- __proto__: null,
1928
- anchor: formatAnchor,
1929
- blockquote: formatBlockquote,
1930
- dataTable: formatDataTable,
1931
- heading: formatHeading,
1932
- horizontalLine: formatHorizontalLine,
1933
- image: formatImage,
1934
- lineBreak: formatLineBreak,
1935
- orderedList: formatOrderedList,
1936
- paragraph: formatParagraph,
1937
- pre: formatPre,
1938
- table: formatTable,
1939
- unorderedList: formatUnorderedList,
1940
- wbr: formatWbr
1941
- });
1942
-
1943
- /**
1944
- * Default options.
1945
- *
1946
- * @constant
1947
- * @type { Options }
1948
- * @default
1949
- * @private
1950
- */
1951
- const DEFAULT_OPTIONS = {
1952
- baseElements: {
1953
- selectors: [ 'body' ],
1954
- orderBy: 'selectors', // 'selectors' | 'occurrence'
1955
- returnDomByDefault: true
1956
- },
1957
- decodeEntities: true,
1958
- encodeCharacters: {},
1959
- formatters: {},
1960
- limits: {
1961
- ellipsis: '...',
1962
- maxBaseElements: undefined,
1963
- maxChildNodes: undefined,
1964
- maxDepth: undefined,
1965
- maxInputLength: (1 << 24) // 16_777_216
1966
- },
1967
- longWordSplit: {
1968
- forceWrapOnLimit: false,
1969
- wrapCharacters: []
1970
- },
1971
- preserveNewlines: false,
1972
- selectors: [
1973
- { selector: '*', format: 'inline' },
1974
- {
1975
- selector: 'a',
1976
- format: 'anchor',
1977
- options: {
1978
- baseUrl: null,
1979
- hideLinkHrefIfSameAsText: false,
1980
- ignoreHref: false,
1981
- linkBrackets: ['[', ']'],
1982
- noAnchorUrl: true
1983
- }
1984
- },
1985
- { selector: 'article', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
1986
- { selector: 'aside', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
1987
- {
1988
- selector: 'blockquote',
1989
- format: 'blockquote',
1990
- options: { leadingLineBreaks: 2, trailingLineBreaks: 2, trimEmptyLines: true }
1991
- },
1992
- { selector: 'br', format: 'lineBreak' },
1993
- { selector: 'div', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
1994
- { selector: 'footer', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
1995
- { selector: 'form', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
1996
- { selector: 'h1', format: 'heading', options: { leadingLineBreaks: 3, trailingLineBreaks: 2, uppercase: true } },
1997
- { selector: 'h2', format: 'heading', options: { leadingLineBreaks: 3, trailingLineBreaks: 2, uppercase: true } },
1998
- { selector: 'h3', format: 'heading', options: { leadingLineBreaks: 3, trailingLineBreaks: 2, uppercase: true } },
1999
- { selector: 'h4', format: 'heading', options: { leadingLineBreaks: 2, trailingLineBreaks: 2, uppercase: true } },
2000
- { selector: 'h5', format: 'heading', options: { leadingLineBreaks: 2, trailingLineBreaks: 2, uppercase: true } },
2001
- { selector: 'h6', format: 'heading', options: { leadingLineBreaks: 2, trailingLineBreaks: 2, uppercase: true } },
2002
- { selector: 'header', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
2003
- {
2004
- selector: 'hr',
2005
- format: 'horizontalLine',
2006
- options: { leadingLineBreaks: 2, length: undefined, trailingLineBreaks: 2 }
2007
- },
2008
- {
2009
- selector: 'img',
2010
- format: 'image',
2011
- options: { baseUrl: null, linkBrackets: ['[', ']'] }
2012
- },
2013
- { selector: 'main', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
2014
- { selector: 'nav', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
2015
- {
2016
- selector: 'ol',
2017
- format: 'orderedList',
2018
- options: { leadingLineBreaks: 2, trailingLineBreaks: 2 }
2019
- },
2020
- { selector: 'p', format: 'paragraph', options: { leadingLineBreaks: 2, trailingLineBreaks: 2 } },
2021
- { selector: 'pre', format: 'pre', options: { leadingLineBreaks: 2, trailingLineBreaks: 2 } },
2022
- { selector: 'section', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } },
2023
- {
2024
- selector: 'table',
2025
- format: 'table',
2026
- options: {
2027
- colSpacing: 3,
2028
- leadingLineBreaks: 2,
2029
- maxColumnWidth: 60,
2030
- rowSpacing: 0,
2031
- trailingLineBreaks: 2,
2032
- uppercaseHeaderCells: true
2033
- }
2034
- },
2035
- {
2036
- selector: 'ul',
2037
- format: 'unorderedList',
2038
- options: { itemPrefix: ' * ', leadingLineBreaks: 2, trailingLineBreaks: 2 }
2039
- },
2040
- { selector: 'wbr', format: 'wbr' },
2041
- ],
2042
- tables: [], // deprecated
2043
- whitespaceCharacters: ' \t\r\n\f\u200b',
2044
- wordwrap: 80
2045
- };
2046
-
2047
- const concatMerge = (acc, src, options) => [...acc, ...src];
2048
- const overwriteMerge = (acc, src, options) => [...src];
2049
- const selectorsMerge = (acc, src, options) => (
2050
- (acc.some(s => typeof s === 'object'))
2051
- ? concatMerge(acc, src) // selectors
2052
- : overwriteMerge(acc, src) // baseElements.selectors
2053
- );
2054
-
2055
- /**
2056
- * Preprocess options, compile selectors into a decision tree,
2057
- * return a function intended for batch processing.
2058
- *
2059
- * @param { Options } [options = {}] HtmlToText options.
2060
- * @returns { (html: string, metadata?: any) => string } Pre-configured converter function.
2061
- * @static
2062
- */
2063
- function compile (options = {}) {
2064
- options = merge(
2065
- DEFAULT_OPTIONS,
2066
- options,
2067
- {
2068
- arrayMerge: overwriteMerge,
2069
- customMerge: (key) => ((key === 'selectors') ? selectorsMerge : undefined)
2070
- }
2071
- );
2072
- options.formatters = Object.assign({}, genericFormatters, textFormatters, options.formatters);
2073
- options.selectors = mergeDuplicatesPreferLast(options.selectors, (s => s.selector));
2074
-
2075
- handleDeprecatedOptions(options);
2076
-
2077
- return compile$1(options);
2078
- }
2079
-
2080
- /**
2081
- * Convert given HTML content to plain text string.
2082
- *
2083
- * @param { string } html HTML content to convert.
2084
- * @param { Options } [options = {}] HtmlToText options.
2085
- * @param { any } [metadata] Optional metadata for HTML document, for use in formatters.
2086
- * @returns { string } Plain text string.
2087
- * @static
2088
- *
2089
- * @example
2090
- * const { convert } = require('html-to-text');
2091
- * const text = convert('<h1>Hello World</h1>', {
2092
- * wordwrap: 130
2093
- * });
2094
- * console.log(text); // HELLO WORLD
2095
- */
2096
- function convert (html, options = {}, metadata = undefined) {
2097
- return compile(options)(html, metadata);
2098
- }
2099
-
2100
- /**
2101
- * Map previously existing and now deprecated options to the new options layout.
2102
- * This is a subject for cleanup in major releases.
2103
- *
2104
- * @param { Options } options HtmlToText options.
2105
- */
2106
- function handleDeprecatedOptions (options) {
2107
- if (options.tags) {
2108
- const tagDefinitions = Object.entries(options.tags).map(
2109
- ([selector, definition]) => ({ ...definition, selector: selector || '*' })
2110
- );
2111
- options.selectors.push(...tagDefinitions);
2112
- options.selectors = mergeDuplicatesPreferLast(options.selectors, (s => s.selector));
2113
- }
2114
-
2115
- function set (obj, path, value) {
2116
- const valueKey = path.pop();
2117
- for (const key of path) {
2118
- let nested = obj[key];
2119
- if (!nested) {
2120
- nested = {};
2121
- obj[key] = nested;
2122
- }
2123
- obj = nested;
2124
- }
2125
- obj[valueKey] = value;
2126
- }
2127
-
2128
- if (options['baseElement']) {
2129
- const baseElement = options['baseElement'];
2130
- set(
2131
- options,
2132
- ['baseElements', 'selectors'],
2133
- (Array.isArray(baseElement) ? baseElement : [baseElement])
2134
- );
2135
- }
2136
- if (options['returnDomByDefault'] !== undefined) {
2137
- set(options, ['baseElements', 'returnDomByDefault'], options['returnDomByDefault']);
2138
- }
2139
-
2140
- for (const definition of options.selectors) {
2141
- if (definition.format === 'anchor' && get(definition, ['options', 'noLinkBrackets'])) {
2142
- set(definition, ['options', 'linkBrackets'], false);
2143
- }
2144
- }
2145
- }
2146
-
2147
- export { compile, convert, convert as htmlToText };