npm - @daviddh/llm-markdown-whatsapp - Versions diffs - 0.0.1 - Mend

@daviddh/llm-markdown-whatsapp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

package/.prettierrc ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "plugins": ["@trivago/prettier-plugin-sort-imports"],
+  "useTabs": false,
+  "tabWidth": 2,
+  "singleQuote": true,
+  "printWidth": 110,
+  "trailingComma": "es5",
+  "importOrder": [
+    "<THIRD_PARTY_MODULES>",
+    "^@globalUtils/(.*)$",
+    "^@src/(.*)$",
+    "^@globalTypes/(.*)$",
+    "^[./]"
+  ],
+  "importOrderSeparation": true,
+  "importOrderSortSpecifiers": true
+}

package/CLAUDE.md ADDED Viewed

@@ -0,0 +1,155 @@
+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+## Project Overview
+TypeScript monorepo that splits LLM-generated markdown text into WhatsApp-friendly chat message chunks. The core algorithm intelligently breaks long text at natural boundaries (questions, periods, lists, markdown sections) while preserving URLs, numbers, emails, abbreviations, parenthetical expressions, and Spanish punctuation.
+The primary use case is Latin American e-commerce customer service over WhatsApp, where LLMs generate long Spanish responses about products (Nike shoes, clothing, etc.) that need to be split into readable chat messages.
+## Commands
+```bash
+npm install                  # Install all workspace dependencies
+npm run build                # Build all packages
+npm run build:core           # Build core package only
+npm test                     # Run all tests
+npm run test:core            # Run core tests only
+npm run typecheck            # Type check all packages (tsc -b)
+npm run lint                 # ESLint
+npm run format               # Prettier
+npm run check                # Format + lint + typecheck
+# Run a single test file
+cd packages/core && NODE_OPTIONS='--experimental-vm-modules' npx jest --testPathPattern="splitChatText"
+# Watch mode for core tests
+cd packages/core && NODE_OPTIONS='--experimental-vm-modules' npx jest --watch
+```
+Note: `NODE_OPTIONS='--experimental-vm-modules'` is required because the project uses ESM modules with ts-jest.
+## Architecture
+**Monorepo structure:** npm workspaces with `packages/*`. Currently only `packages/core` (`@llm-markdown-whatsapp/core`) exists. The root `tsconfig.json` references additional packages (redis, e2e/*) that are not yet present.
+**Core package entry point:** `packages/core/src/index.ts` re-exports `splitChatText` from `packages/core/src/chatSplit/index.ts`, which re-exports from `splitChatText.ts`. This is the single public API function.
+### splitChatText Pipeline
+`splitChatText(text)` in `packages/core/src/chatSplit/splitChatText.ts` is the orchestrator. It accepts `string | null | undefined` and returns `string[]`.
+**1. Pre-processing** (`preProcessText`):
+- `removePeriodsAfterURLs` (`urlNormalization.ts`): Replaces `.` after URLs with `\n` (URLs never end with periods)
+- `normalizeInlineNumberedList` (`listNormalization.ts`): Detects inline patterns like `1. X 2. Y 3. Z` and adds line breaks between items. Skips already-formatted lists. Handles both colon-preceding and question-preceding patterns.
+- `normalizeInlineProductCardList` (`listNormalization.ts`): Detects inline product cards (with `🛍️` or markdown formatting + emoji indicators) and adds line breaks before each card, before emoji indicators within cards, and before trailing questions.
+**2. Main loop** — iterates while `remainingText !== ''`, trying processors in priority order. First match wins, remaining text is re-evaluated from the top.
+Processor groups (in `splitChatText.ts`):
+- **`runIntroAndListProcessors`** (highest priority):
+  - `processIntroWithList` (`splitProcessors.ts`): Matches `intro:` + newline + list start (`\d. ` or `- `). If intro < 150 chars, splits after intro. Handles "Puedes responder con:" pattern specially.
+  - `processQuestionWithList` (`splitProcessors.ts`): Matches `question?\n` + numbered list. Keeps together as one chunk if total < 250 chars and >= 2 list items.
+  - `processIntroWithLongParagraphs` (`splitProcessors.ts`): Matches `intro:\n` + paragraph > 150 chars. Splits after intro.
+- **`runContentStructureProcessors`**:
+  - `processProductCardLists` (`productCardProcessor.ts`): Detects product cards by emoji pattern (`\d. 🛍️`) or markdown pattern (`\d. *Title*` + emoji indicators). Extracts intro, splits each card into its own chunk (removing the `\d.` prefix), and separates trailing questions from the last card via `extractTrailingQuestion`.
+  - `processListSection` (`listProcessor.ts`): Uses `findListSection` (`sections.ts`) to detect numbered or bullet lists. Splits numbered lists per-item if items > 150 chars or average > 70 chars with <= 3 items. Splits bullet lists per-item only if items > 150 chars. Otherwise keeps the entire list as one chunk.
+  - `processLongParagraphsAfterIntro` (`paragraphProcessor.ts`): `intro:\n` followed by multiple paragraphs where at least one > 150 chars. Splits after intro.
+  - `processLongParagraphSequence` (`paragraphProcessor.ts`): First paragraph > 150 chars with multiple lines. Splits after first paragraph (unless followed by "question with options" pattern).
+- **`runFormattingProcessors`**:
+  - `processMarkdownSection` (`paragraphProcessor.ts`): Uses `findMarkdownSection` (`sections.ts`) to detect `*Header*` or `_Header_` followed by content until next `\n\n` or end. Splits at section boundaries.
+  - `processSectionBreaks` (`breakProcessor.ts`): Splits at `\n\n` (double newline) if > 50 chars before the break. Does NOT split if: before ends with `?` and after has short intro + bullets, or before ends with "Puedes responder con:" + bullets, or after has question-with-options pattern. Also checks for markdown headers after break and long paragraphs before break.
+- **`runQuestionAndPeriodProcessors`** (fallback):
+  - `processQuestionMarks` (`questionProcessor.ts`): Finds all valid `?` positions (excluding those inside bullet lines, parentheses, or before response options). If multiple questions are "contiguous" (no `.` between them, < 50 chars gap), groups them and splits after the last `?`. For single questions: long questions (> 100 chars) split directly; short questions combine with the next sentence if combined length <= 110 chars. Handles emoji-after-question by keeping emoji with the question chunk. Does NOT split if followed by lowercase (sentence continuation).
+  - `processPeriodSplits` (`periodProcessor.ts`): Only runs if remaining text > 100 chars. Builds protected ranges (URLs, domains, emails, numbers, abbreviations, bullet points, location abbreviations like `D.C.`) and finds valid `.` positions. Skips if after-period text has a short question (< 35 chars), or if last chunk was short (< 50 chars) + current text is short (< 150 chars) + after-period is short (< 150 chars).
+**3. Post-processing**:
+- `mergeSmallChunks` (`mergeProcessor.ts`): Chunks < 20 chars merge with the next chunk. Last small chunk merges backward with previous. Respects boundaries: does not merge if current ends with `:` and next is a list or long paragraph, or if next chunk ends with `:`. Does not merge if next starts with `¿`.
+- `normalizeSpanishPunctuation` (`punctuationNormalization.ts`): For `¿` and `¡` marks: if mid-sentence (not at string start, not after `.`/`!`/`?`, not after line break), lowercases the following letter. This fixes LLM-generated text like `ayudarte ¿Cómo estás?` → `ayudarte ¿cómo estás?`.
+### Key Design Patterns
+- **Processor chain:** Each processor returns `{ splitFound: boolean, newRemainingText: string }`. The main loop tries processors in priority order; the first match wins, text is re-evaluated from the top.
+- **Protected ranges:** `periodProcessor.ts` builds protected ranges (URLs, emails, domains, numbers, abbreviations, bullet points) to prevent splitting inside them. Ranges are `{ start, end }` intervals. Position is protected if it falls within any range.
+- **Position helpers:** `positionHelpers.ts` — `isPositionInsideParentheses` counts open parens before position; `isPositionInBulletLine` checks if position is on a line starting with `- ` or `• `.
+- **Text helpers:** `textHelpers.ts` — `smartTrim` removes Unicode whitespace while preserving emojis; `hasTextContent` checks for alphanumeric (not just emojis/symbols); `startsWithEmoji` / `startsWithLowercase` / `findPositionAfterEmoji` for question splitting logic; `isParentheticalClarification` detects `(something)?` patterns.
+- **Section detection:** `sections.ts` — `findMarkdownSection` detects `*Header*\n` or `_Header_\n` sections; `findListSection` detects numbered and bullet lists by walking lines with state machines (`NumberedListState`, `BulletListState`).
+- **Constants centralized:** Thresholds in `constants.ts` — `MIN_CHUNK_SIZE` (20), `MAX_INTRO_LENGTH` (150), `MAX_QUESTION_WITH_OPTIONS_LENGTH` (250), `SHORT_INTRO_THRESHOLD` (50), `LONG_QUESTION_THRESHOLD` (100), `COMBINED_LENGTH_THRESHOLD` (110), `SHORT_QUESTION_FRAGMENT_THRESHOLD` (35), `MIN_CONTENT_BEFORE_BREAK` (50), `SHORT_CHUNK_THRESHOLD` (50), `CURRENT_TEXT_SHORT_THRESHOLD` (150), `AVG_ITEM_LENGTH_THRESHOLD` (70), `MAX_ITEMS_FOR_LONG_SPLIT` (3), `MAX_LIST_NUMBER` (20), `FIRST_NEWLINE_SEARCH_LIMIT` (100), `DOUBLE_NEWLINE_DISTANCE_THRESHOLD` (5). Also `splitConstants.ts` — `PERIOD_SPLIT_TEXT_THRESHOLD` (100). Several files also define local constants like `LONG_PARAGRAPH_THRESHOLD` (150).
+### File Map
+```
+packages/core/src/
+├── index.ts                         # Public API re-export
+├── chatSplit/
+│   ├── index.ts                     # Re-exports splitChatText
+│   ├── splitChatText.ts             # Main orchestrator: pre-process → processor loop → post-process
+│   ├── splitProcessors.ts           # Intro+list, question+list, intro+long-paragraphs processors
+│   ├── productCardProcessor.ts      # Product card detection (🛍️ emoji or *Title* markdown patterns)
+│   ├── listProcessor.ts             # Numbered/bullet list chunking (per-item if items are huge)
+│   ├── paragraphProcessor.ts        # Long paragraph sequences, markdown section detection
+│   ├── breakProcessor.ts            # Double newline (section break) splitting
+│   ├── questionProcessor.ts         # Question mark splitting with contiguous question grouping
+│   ├── periodProcessor.ts           # Period splitting with protected ranges (URLs, emails, numbers, etc.)
+│   ├── mergeProcessor.ts            # Post-processing: merge chunks < 20 chars with neighbors
+│   ├── sections.ts                  # Markdown section and list section boundary detection (state machines)
+│   ├── textHelpers.ts               # smartTrim, hasTextContent, emoji detection, lowercase detection
+│   ├── positionHelpers.ts           # Parentheses depth counting, bullet line detection
+│   ├── listNormalization.ts         # Pre-processing: inline numbered list and product card normalization
+│   ├── urlNormalization.ts          # Pre-processing: remove periods after URLs
+│   ├── punctuationNormalization.ts  # Post-processing: Spanish ¿/¡ capitalization normalization
+│   ├── constants.ts                 # All threshold constants (centralized)
+│   └── splitConstants.ts            # PERIOD_SPLIT_TEXT_THRESHOLD constant
+└── __tests__/
+    └── strs.splitChatText.test.ts   # 40+ scenario-based tests with exact chunk matching
+```
+### Shared Interface
+All processors share the `SplitResult` interface (defined in `splitProcessors.ts`):
+```typescript
+interface SplitResult {
+  splitFound: boolean;
+  newRemainingText: string;
+}
+```
+## Code Style & Rules
+- **ESLint config** (`eslint.config.mjs`): `eslint-config-love` + `typescript-eslint` recommended + strict custom rules:
+  - `max-lines-per-function: 40` (skip blanks/comments)
+  - `max-depth: 2`
+  - `max-lines: 300` per file
+  - `curly: multi-line`
+- When hitting max-lines/max-lines-per-function, extract helper functions or split into separate files. Never compress statements onto single lines.
+- Never use `any` type — always use explicit TypeScript types.
+- Never disable ESLint rules (no eslint-disable comments or config modifications).
+- **Prettier** (`.prettierrc`): single quotes, 110 print width, trailing commas (es5), 2-space tabs, import sorting via `@trivago/prettier-plugin-sort-imports`.
+- **Import order** (enforced by Prettier plugin): third-party modules → `@globalUtils/*` → `@src/*` → `@globalTypes/*` → relative imports (with blank line separation).
+- **ESM throughout:** `"type": "module"` in all package.json files. Imports use `.js` extensions (TypeScript ESM convention).
+- **Regex:** Uses the `v` flag (Unicode sets) consistently across all regex patterns.
+- **Constants style:** Numeric constants are extracted as named `const` variables (`ZERO`, `NOT_FOUND`, `INDEX_OFFSET`, `INCREMENT`, etc.) throughout all files. New code should follow this pattern.
+## Testing
+Tests are in `packages/core/src/__tests__/strs.splitChatText.test.ts`. The test suite has 40+ scenario-based tests organized in `describe` blocks:
+- Basic question splitting, contiguous questions, period splitting, smart question-period combination
+- URL/link protection, number/price protection, email protection
+- Edge cases (empty input, null/undefined, emojis, markdown formatting, abbreviations, version numbers)
+- Real-world scenarios (22 numbered tests from actual WhatsApp conversations in Spanish)
+- Spanish punctuation normalization (¿ and ¡ capitalization)
+- Parentheses protection
+Tests verify chunk boundaries exactly with `toEqual`. Some tests use structural assertions (`toContain`, `toBe(true/false)`) for URL/domain integrity checks. Jest is configured with `ts-jest` ESM preset and `--experimental-vm-modules`.
+## TypeScript Config
+- Target: ES2024, Module: NodeNext, moduleResolution: NodeNext
+- `strict: true`, `noUncheckedIndexedAccess: true`, `isolatedModules: true`
+- Build uses `tsconfig.build.json` (extends `tsconfig.json`, excludes `__tests__/`) + `tsc-alias` for path alias resolution
+- Path aliases configured in root `jest.config.js`: `@globalUtils/*`, `@src/*`, `@globalTypes/*` (though core package doesn't currently use path aliases)

package/README.md ADDED Viewed

@@ -0,0 +1,304 @@
+<p align="center">
+  <h1 align="center">LLM Markdown WhatsApp</h1>
+  <p align="center">
+    A TypeScript library that splits LLM-generated markdown into WhatsApp-friendly chat message chunks.
+  </p>
+</p>
+<div align="center">
+[![TypeScript](https://img.shields.io/badge/TypeScript-5.0+-3178C6?style=flat-square&logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
+[![Node.js](https://img.shields.io/badge/Node.js-18+-339933?style=flat-square&logo=node.js&logoColor=white)](https://nodejs.org/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow?style=flat-square)](https://opensource.org/licenses/MIT)
+</div>
+<p align="center">
+  <a href="#the-problem">Problem</a> •
+  <a href="#quickstart">Quickstart</a> •
+  <a href="#features">Features</a> •
+  <a href="#splitting-rules">Splitting Rules</a> •
+  <a href="#api-reference">API Reference</a>
+</p>
+---
+## The Problem
+LLMs generate long, structured markdown responses—paragraphs, numbered lists, product cards, nested bullet points. Sending these as a single WhatsApp message creates a wall of text that users won't read.
+Naively splitting at character limits breaks mid-sentence, mid-list, or mid-URL. Splitting at every period creates fragmented messages that feel robotic. Neither approach understands the structure of the content.
+Additionally:
+- **URLs, emails, and numbers contain periods.** Splitting at `Nike.com.co` or `$1.000.000` or `juan.perez@gmail.com` produces broken fragments.
+- **Lists should stay together.** A numbered list of products or a bullet list of options is a single logical unit—splitting inside an item destroys readability.
+- **Questions need context.** A short trailing question like "¿Te interesa?" should stay attached to the preceding sentence, not become its own tiny message.
+This library handles all of this. One function call, zero configuration. Pass in the LLM's markdown output, get back an array of WhatsApp-ready message chunks.
+## How It Works
+The library takes a markdown string and splits it into an array of smaller chunks optimized for chat readability. It applies a priority-ordered chain of processors:
+1. Pre-processes text (normalizes inline lists, removes periods after URLs)
+2. Tries structural splits first (intro + list, product cards, markdown sections, double newlines)
+3. Falls back to semantic splits (question marks, periods) with intelligent protection
+4. Merges chunks that are too small (<20 chars) with their neighbors
+5. Normalizes Spanish punctuation (¿/¡ capitalization rules)
+## Features
+| Feature                       | Description                                                                    |
+| ----------------------------- | ------------------------------------------------------------------------------ |
+| **Smart Question Splitting**  | Splits at question marks while keeping contiguous questions together            |
+| **List Preservation**         | Keeps numbered and bullet lists intact, splits only when items are very long    |
+| **Product Card Detection**    | Recognizes product card patterns (with emojis or markdown) and splits per card  |
+| **URL/Email/Number Safety**   | Never splits inside URLs, emails, domain names, or formatted numbers           |
+| **Parentheses Protection**    | Avoids splitting inside parenthetical expressions                              |
+| **Abbreviation Awareness**    | Protects periods in `etc.`, `Dr.`, `D.C.`, `S.A.`, version numbers            |
+| **Spanish Punctuation**       | Normalizes capitalization after mid-sentence ¿ and ¡ marks                     |
+| **Small Chunk Merging**       | Prevents tiny fragments by merging small chunks with adjacent ones             |
+| **Markdown Section Support**  | Splits at markdown headers (`*Title*` or `_Title_`) as natural boundaries      |
+| **Zero Configuration**        | Single function, no setup required—just pass text, get chunks                  |
+## Quickstart
+```bash
+npm install @llm-markdown-whatsapp/core
+```
+### Basic Usage
+```typescript
+import { splitChatText } from '@llm-markdown-whatsapp/core';
+const llmResponse = 'Thanks for reaching out. I understand your situation and I want to help you resolve it in the best way possible. You can send your product back at no extra cost. Would you prefer a full refund or an exchange for a different model?';
+const chunks = splitChatText(llmResponse);
+console.log(chunks);
+// [
+//   'Thanks for reaching out.',
+//   'I understand your situation and I want to help you resolve it in the best way possible.',
+//   'You can send your product back at no extra cost.',
+//   'Would you prefer a full refund or an exchange for a different model?',
+// ]
+```
+### Lists Stay Together
+```typescript
+const llmResponse = `I found these options:
+- Nike Pegasus Plus – High-performance running shoes for marathons and daily runs, featuring ZoomX Foam cushioning and a Flyknit upper that adapts to your foot. Available in black and a multicolor combination.
+- Nike Air Max 90 – Classic model with a waffle sole and the iconic visible Air cushioning, in neutral tones like light bone/olive/university grey.
+Which of these models interests you the most? 😊`;
+const chunks = splitChatText(llmResponse);
+// [
+//   'I found these options:',
+//   '- Nike Pegasus Plus – High-performance running shoes for marathons...',
+//   '- Nike Air Max 90 – Classic model with a waffle sole...',
+//   'Which of these models interests you the most? 😊',
+// ]
+```
+### Product Cards Split Per Card
+```typescript
+const llmResponse = `I found these options:
+1. 🛍️  Pegasus Plus Shoes: 💵 $1.015.000
+📏 Color: Black, Glacier Blue/Mint Foam/Impact Green/Black.
+📏 Shoe Size: 43, 41, 38.
+✅ Ultra-lightweight, with ZoomX cushioning and great breathability.
+2. 🛍️  ISPA Sense Shoes: 💵 $804.900
+📏 Shoe Size: 38, 39, 40, 41, 42, 43.
+✅ Casual style with great comfort for daily use.
+Which of these products do you like?`;
+const chunks = splitChatText(llmResponse);
+// [
+//   'I found these options:',
+//   '🛍️  Pegasus Plus Shoes: 💵 $1.015.000\n📏 Color: ...\n✅ Ultra-lightweight...',
+//   '🛍️  ISPA Sense Shoes: 💵 $804.900\n📏 Shoe Size: ...\n✅ Casual style...',
+//   'Which of these products do you like?',
+// ]
+```
+---
+## Splitting Rules
+The library applies processors in priority order. The first processor that finds a valid split point wins, and the remaining text is re-evaluated from the top.
+### Structural Splits (highest priority)
+| Pattern                    | Behavior                                                                             |
+| -------------------------- | ------------------------------------------------------------------------------------ |
+| **Intro + List**           | Text ending with `:` followed by a numbered/bullet list splits after the intro       |
+| **Question + Numbered List** | Short question followed by numbered options stays together as one chunk             |
+| **Product Cards**          | Numbered items with `🛍️` or `*Title*` formatting split into one chunk per card       |
+| **List Sections**          | Numbered/bullet lists kept as one chunk; split per-item only when items are >150 chars |
+| **Markdown Sections**      | `*Header*` or `_Header_` with content splits at section boundaries                   |
+| **Section Breaks**         | Double newlines (`\n\n`) act as natural split points                                 |
+### Semantic Splits (fallback)
+| Pattern                    | Behavior                                                                             |
+| -------------------------- | ------------------------------------------------------------------------------------ |
+| **Question Marks**         | Splits after `?` unless followed by lowercase (sentence continuation) or emoji       |
+| **Contiguous Questions**   | Multiple questions without periods between them stay together                        |
+| **Period Splits**          | Splits at `.` for text >100 chars, skipping protected positions                      |
+### Protected Content (never split inside)
+| Content                    | Examples                                                                             |
+| -------------------------- | ------------------------------------------------------------------------------------ |
+| **URLs**                   | `https://example.com/path`, `www.site.com`                                           |
+| **Plain Domains**          | `Nike.com.co`, `shop.example.co.uk`                                                  |
+| **Emails**                 | `juan.perez@gmail.com`                                                               |
+| **Formatted Numbers**      | `$1.000.000`, `2.5.1`, `15.5`                                                       |
+| **Abbreviations**          | `etc.`, `Dr.`, `D.C.`, `S.A.`, `E.U.A.`                                             |
+| **Parenthetical Expressions** | `(calle, número, referencia, etc.)`                                               |
+| **Bullet Point Content**   | Content within `- item` or `• item` lines                                            |
+### Post-processing
+- **Small Chunk Merging:** Chunks under 20 characters merge with the next chunk (or previous, if last).
+- **Spanish Punctuation:** After mid-sentence `¿` or `¡` (not at start or after `.`/`!`/`?`), the following letter is lowercased. Example: `ayudarte ¿Cómo estás?` becomes `ayudarte ¿cómo estás?`.
+---
+## API Reference
+### `splitChatText(text)`
+```typescript
+function splitChatText(text: string | null | undefined): string[]
+```
+Splits a markdown text string into an array of chat-ready chunks.
+- **Input:** A string of markdown text (typically an LLM response). Accepts `null` or `undefined` safely.
+- **Output:** An array of strings, each suitable for sending as an individual WhatsApp message.
+- Returns `[]` for `null`, `undefined`, or empty string.
+```typescript
+import { splitChatText } from '@llm-markdown-whatsapp/core';
+const chunks = splitChatText(llmMarkdownText);
+```
+## Project Structure
+```
+llm-markdown-whatsapp/
+├── packages/
+│   └── core/                    # Core splitting library
+│       └── src/
+│           ├── index.ts         # Public API — exports splitChatText
+│           └── chatSplit/
+│               ├── splitChatText.ts          # Main orchestrator
+│               ├── splitProcessors.ts        # Intro + list processors
+│               ├── productCardProcessor.ts   # Product card detection and splitting
+│               ├── listProcessor.ts          # Numbered/bullet list processing
+│               ├── paragraphProcessor.ts     # Long paragraph and markdown sections
+│               ├── breakProcessor.ts         # Double newline section breaks
+│               ├── questionProcessor.ts      # Question mark splitting logic
+│               ├── periodProcessor.ts        # Period splitting with protected ranges
+│               ├── mergeProcessor.ts         # Small chunk merging
+│               ├── sections.ts              # Markdown/list section detection
+│               ├── textHelpers.ts           # Smart trim, emoji detection, text utilities
+│               ├── positionHelpers.ts       # Parentheses/bullet position checks
+│               ├── listNormalization.ts     # Inline list normalization
+│               ├── urlNormalization.ts       # URL period removal
+│               ├── punctuationNormalization.ts  # Spanish ¿/¡ capitalization
+│               ├── constants.ts             # Threshold constants
+│               └── splitConstants.ts        # Split-specific constants
+└── README.md
+```
+## Architecture
+```mermaid
+flowchart TB
+    subgraph Input["Input"]
+        T["LLM markdown text"]
+    end
+    Input --> Pre
+    subgraph Pre["Pre-processing"]
+        direction LR
+        A["Normalize<br/>inline lists"]
+        B["Normalize<br/>product cards"]
+        C["Remove periods<br/>after URLs"]
+    end
+    Pre --> Processors
+    subgraph Processors["Processor Chain (priority order)"]
+        direction TB
+        P1["Intro + List<br/>Question + List<br/>Intro + Long Paragraphs"]
+        P2["Product Cards<br/>List Sections<br/>Long Paragraphs"]
+        P3["Markdown Sections<br/>Section Breaks (double newlines)"]
+        P4["Question Marks<br/>Period Splits"]
+    end
+    Processors --> Post
+    subgraph Post["Post-processing"]
+        direction LR
+        D["Merge small<br/>chunks"]
+        E["Normalize Spanish<br/>punctuation"]
+    end
+    Post --> Output
+    subgraph Output["Output"]
+        O["string[ ] — array of chat-ready chunks"]
+    end
+```
+---
+## Contributing
+Contributions are welcome! Please:
+1. Fork the repository
+2. Create a feature branch (`git checkout -b feature/amazing-feature`)
+3. Write tests for your changes
+4. Ensure all tests pass (`npm test`)
+5. Ensure types check (`npm run typecheck`)
+6. Commit with a clear message
+7. Open a Pull Request
+## Development
+```bash
+git clone <repository-url>
+cd llm-markdown-whatsapp
+npm install
+npm run build          # Build all packages
+npm test               # Run tests
+npm run typecheck      # Type check
+npm run lint           # Lint
+npm run check          # Format + lint + typecheck
+```
+## License
+MIT License - see [LICENSE](LICENSE) for details.
+---
+<p align="center">
+  Built with TypeScript • Zero Dependencies • WhatsApp-Optimized Chat Splitting
+</p>

package/eslint.config.mjs ADDED Viewed

@@ -0,0 +1,28 @@
+import js from '@eslint/js';
+import love from 'eslint-config-love';
+import { defineConfig } from 'eslint/config';
+import globals from 'globals';
+import tseslint from 'typescript-eslint';
+export default defineConfig([
+  {
+    ignores: ['coverage/**', 'dist/**', '**/dist/**', 'node_modules/**', '**/node_modules/**', '*.config.js', '*.config.ts', 'examples/**'],
+  },
+  { files: ['**/*.{js,mjs,cjs,ts,mts,cts}'], plugins: { js }, extends: ['js/recommended'] },
+  { files: ['**/*.{js,mjs,cjs,ts,mts,cts}'], languageOptions: { globals: globals.node } },
+  {
+    ...love,
+    files: ['**/*.{ts,mts,cts}'],
+  },
+  tseslint.configs.recommended,
+  {
+    files: ['**/*.{js,mjs,cjs,ts,mts,cts}'],
+    rules: {
+      // Our custom rules (preserved)
+      'max-lines-per-function': ['error', { max: 40, skipBlankLines: true, skipComments: true }],
+      'max-depth': ['error', { max: 2 }],
+      'max-lines': ['error', { max: 300, skipBlankLines: false, skipComments: true }],
+      curly: ['error', 'multi-line'],
+    },
+  },
+]);

package/jest.config.js ADDED Viewed

@@ -0,0 +1,40 @@
+const config = {
+  clearMocks: true,
+  collectCoverage: true,
+  testTimeout: 120000,
+  coverageDirectory: 'coverage',
+  coverageProvider: 'v8',
+  extensionsToTreatAsEsm: ['.ts'],
+  moduleNameMapper: {
+    '^@globalTypes/(.*)\\.js$': '<rootDir>/src/types/$1',
+    '^@globalUtils/(.*)\\.js$': '<rootDir>/src/utils/$1',
+    '^@src/(.*)\\.js$': '<rootDir>/src/$1',
+    '^@globalTypes/(.*)$': '<rootDir>/src/types/$1',
+    '^@globalUtils/(.*)$': '<rootDir>/src/utils/$1',
+    '^@src/(.*)$': '<rootDir>/src/$1',
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+  },
+  preset: 'ts-jest/presets/default-esm',
+  testEnvironment: 'node',
+  testMatch: ['**/__tests__/**/*.test.ts', '**/__tests__/**/*.spec.ts'],
+  transform: {
+    '^.+\\.tsx?$': [
+      'ts-jest',
+      {
+        useESM: true,
+        tsconfig: {
+          module: 'NodeNext',
+          moduleResolution: 'nodenext',
+          target: 'ES2024',
+          allowSyntheticDefaultImports: true,
+          esModuleInterop: true,
+          allowImportingTsExtensions: true,
+          isolatedModules: true,
+        },
+      },
+    ],
+  },
+  transformIgnorePatterns: ['/node_modules/', '\\.pnp\\.[^\\/]+$'],
+};
+export default config;

package/package.json ADDED Viewed

@@ -0,0 +1,61 @@
+{
+  "name": "@daviddh/llm-markdown-whatsapp",
+  "version": "0.0.1",
+  "private": false,
+  "description": "Transforms Markdown into WhatsApp text format monorepo",
+  "keywords": [
+    "llm",
+    "whatsapp",
+    "markdown",
+    "formatter",
+    "styling",
+    "utils",
+    "strs",
+    "strings",
+    "converter",
+    "transformer"
+  ],
+  "homepage": "https://github.com/daviddominguezh/llm-markdown-whatsapp#readme",
+  "bugs": {
+    "url": "https://github.com/daviddominguezh/llm-markdown-whatsapp/issues"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/daviddominguezh/llm-markdown-whatsapp.git"
+  },
+  "license": "MIT",
+  "author": "David Dominguez",
+  "type": "module",
+  "main": "packages/core/dist/index.js",
+  "workspaces": [
+    "packages/*"
+  ],
+  "scripts": {
+    "typecheck": "tsc -b",
+    "lint": "eslint .",
+    "format": "prettier --write \"**/*.{js,ts,json}\"",
+    "check": "npm run format && npm run lint && npm run typecheck",
+    "build": "npm run build --workspaces",
+    "build:core": "npm run build -w @llm-markdown-whatsapp/core",
+    "test": "npm run test --workspaces --if-present",
+    "test:core": "npm run test -w @llm-markdown-whatsapp/core"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.28.0",
+    "@trivago/prettier-plugin-sort-imports": "^5.2.2",
+    "@types/jest": "^30.0.0",
+    "eslint": "^9.28.0",
+    "eslint-config-love": "^144.0.0",
+    "globals": "^16.2.0",
+    "jest": "^30.0.5",
+    "prettier": "^3.8.1",
+    "ts-jest": "^29.4.4",
+    "tsc-alias": "^1.8.10",
+    "tsx": "^4.19.4",
+    "typescript": "^5.8.3",
+    "typescript-eslint": "^8.33.1"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  }
+}

package/packages/core/dist/__tests__/splitChatText.basic.test.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export {};
2	+ //# sourceMappingURL=splitChatText.basic.test.d.ts.map

package/packages/core/dist/__tests__/splitChatText.basic.test.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"splitChatText.basic.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/splitChatText.basic.test.ts"],"names":[],"mappings":""}