@tpmjs/official-regex-extract 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +183 -0
- package/dist/index.d.ts +38 -0
- package/dist/index.js +69 -0
- package/package.json +89 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024-2025 TPMJS
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# @tpmjs/official-regex-extract
|
|
2
|
+
|
|
3
|
+
Extract all regex matches from text with optional capture group support.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @tpmjs/official-regex-extract
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
import { regexExtractTool } from '@tpmjs/official-regex-extract';
|
|
15
|
+
import { generateText } from 'ai';
|
|
16
|
+
|
|
17
|
+
const result = await generateText({
|
|
18
|
+
model: yourModel,
|
|
19
|
+
tools: {
|
|
20
|
+
regexExtract: regexExtractTool,
|
|
21
|
+
},
|
|
22
|
+
prompt: 'Extract all email addresses from the text',
|
|
23
|
+
});
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Parameters
|
|
27
|
+
|
|
28
|
+
- `text` (string, required): The text to search for matches
|
|
29
|
+
- `pattern` (string, required): Regular expression pattern (without delimiters)
|
|
30
|
+
- `flags` (string, optional): Regular expression flags
|
|
31
|
+
- `g` - global (automatically added if not present)
|
|
32
|
+
- `i` - case-insensitive
|
|
33
|
+
- `m` - multiline
|
|
34
|
+
- `s` - dotAll (. matches newlines)
|
|
35
|
+
- `u` - unicode
|
|
36
|
+
- `y` - sticky
|
|
37
|
+
- `groups` (boolean, optional): If true, return detailed match objects with capture groups and positions. Default: false
|
|
38
|
+
|
|
39
|
+
## Returns
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
{
|
|
43
|
+
matches: string[] | MatchWithGroups[]; // Array of matches
|
|
44
|
+
matchCount: number; // Total number of matches
|
|
45
|
+
hasMatches: boolean; // Whether any matches were found
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// When groups=true, each match is:
|
|
49
|
+
{
|
|
50
|
+
match: string; // The full matched text
|
|
51
|
+
groups: Record<string, string>; // Named capture groups
|
|
52
|
+
index: number; // Position in text
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Examples
|
|
57
|
+
|
|
58
|
+
### Extract email addresses
|
|
59
|
+
|
|
60
|
+
```typescript
|
|
61
|
+
const result = await regexExtractTool.execute({
|
|
62
|
+
text: 'Contact us at support@example.com or sales@example.com',
|
|
63
|
+
pattern: '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}',
|
|
64
|
+
flags: 'i',
|
|
65
|
+
});
|
|
66
|
+
// {
|
|
67
|
+
// matches: ['support@example.com', 'sales@example.com'],
|
|
68
|
+
// matchCount: 2,
|
|
69
|
+
// hasMatches: true
|
|
70
|
+
// }
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Extract URLs with capture groups
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
const result = await regexExtractTool.execute({
|
|
77
|
+
text: 'Visit https://example.com and http://test.org',
|
|
78
|
+
pattern: '(?<protocol>https?)://(?<domain>[^\\s]+)',
|
|
79
|
+
groups: true,
|
|
80
|
+
});
|
|
81
|
+
// {
|
|
82
|
+
// matches: [
|
|
83
|
+
// {
|
|
84
|
+
// match: 'https://example.com',
|
|
85
|
+
// groups: { protocol: 'https', domain: 'example.com' },
|
|
86
|
+
// index: 6
|
|
87
|
+
// },
|
|
88
|
+
// {
|
|
89
|
+
// match: 'http://test.org',
|
|
90
|
+
// groups: { protocol: 'http', domain: 'test.org' },
|
|
91
|
+
// index: 30
|
|
92
|
+
// }
|
|
93
|
+
// ],
|
|
94
|
+
// matchCount: 2,
|
|
95
|
+
// hasMatches: true
|
|
96
|
+
// }
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Extract phone numbers
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
const result = await regexExtractTool.execute({
|
|
103
|
+
text: 'Call (555) 123-4567 or (555) 987-6543',
|
|
104
|
+
pattern: '\\(\\d{3}\\)\\s\\d{3}-\\d{4}',
|
|
105
|
+
});
|
|
106
|
+
// {
|
|
107
|
+
// matches: ['(555) 123-4567', '(555) 987-6543'],
|
|
108
|
+
// matchCount: 2,
|
|
109
|
+
// hasMatches: true
|
|
110
|
+
// }
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Extract hashtags (case-insensitive)
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
const result = await regexExtractTool.execute({
|
|
117
|
+
text: 'Love #JavaScript and #TypeScript!',
|
|
118
|
+
pattern: '#\\w+',
|
|
119
|
+
flags: 'i',
|
|
120
|
+
});
|
|
121
|
+
// {
|
|
122
|
+
// matches: ['#JavaScript', '#TypeScript'],
|
|
123
|
+
// matchCount: 2,
|
|
124
|
+
// hasMatches: true
|
|
125
|
+
// }
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Extract dates with named groups
|
|
129
|
+
|
|
130
|
+
```typescript
|
|
131
|
+
const result = await regexExtractTool.execute({
|
|
132
|
+
text: 'Event on 2024-03-15 and deadline 2024-06-30',
|
|
133
|
+
pattern: '(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})',
|
|
134
|
+
groups: true,
|
|
135
|
+
});
|
|
136
|
+
// {
|
|
137
|
+
// matches: [
|
|
138
|
+
// {
|
|
139
|
+
// match: '2024-03-15',
|
|
140
|
+
// groups: { year: '2024', month: '03', day: '15' },
|
|
141
|
+
// index: 9
|
|
142
|
+
// },
|
|
143
|
+
// {
|
|
144
|
+
// match: '2024-06-30',
|
|
145
|
+
// groups: { year: '2024', month: '06', day: '30' },
|
|
146
|
+
// index: 33
|
|
147
|
+
// }
|
|
148
|
+
// ],
|
|
149
|
+
// matchCount: 2,
|
|
150
|
+
// hasMatches: true
|
|
151
|
+
// }
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Use Cases
|
|
155
|
+
|
|
156
|
+
- Extracting email addresses from text
|
|
157
|
+
- Finding URLs in content
|
|
158
|
+
- Parsing phone numbers
|
|
159
|
+
- Extracting hashtags or mentions
|
|
160
|
+
- Finding dates in various formats
|
|
161
|
+
- Extracting code snippets or code blocks
|
|
162
|
+
- Parsing structured data from text
|
|
163
|
+
- Finding price values
|
|
164
|
+
- Extracting IP addresses
|
|
165
|
+
- Validating and extracting specific patterns
|
|
166
|
+
|
|
167
|
+
## Tips
|
|
168
|
+
|
|
169
|
+
- The `g` (global) flag is automatically added to find all matches
|
|
170
|
+
- Use named capture groups `(?<name>...)` for clearer results when `groups=true`
|
|
171
|
+
- Escape special regex characters: `\` `^` `$` `.` `*` `+` `?` `(` `)` `[` `]` `{` `}` `|`
|
|
172
|
+
- Use `i` flag for case-insensitive matching
|
|
173
|
+
- Use `m` flag when matching across multiple lines with `^` and `$`
|
|
174
|
+
|
|
175
|
+
## Error Handling
|
|
176
|
+
|
|
177
|
+
The tool throws an error if:
|
|
178
|
+
- The pattern is invalid regex syntax
|
|
179
|
+
- Text or pattern is not a string
|
|
180
|
+
|
|
181
|
+
## License
|
|
182
|
+
|
|
183
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import * as ai from 'ai';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Regex Extract Tool for TPMJS
|
|
5
|
+
* Extracts all regex matches from text with optional capture group support
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Input interface for regex extraction
|
|
9
|
+
*/
|
|
10
|
+
interface RegexExtractInput {
|
|
11
|
+
text: string;
|
|
12
|
+
pattern: string;
|
|
13
|
+
flags?: string;
|
|
14
|
+
groups?: boolean;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Match object with capture groups
|
|
18
|
+
*/
|
|
19
|
+
interface MatchWithGroups {
|
|
20
|
+
match: string;
|
|
21
|
+
groups: Record<string, string | undefined>;
|
|
22
|
+
index: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Output interface for regex extract result
|
|
26
|
+
*/
|
|
27
|
+
interface RegexExtractResult {
|
|
28
|
+
matches: string[] | MatchWithGroups[];
|
|
29
|
+
matchCount: number;
|
|
30
|
+
hasMatches: boolean;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Regex Extract Tool
|
|
34
|
+
* Extracts all matches from text using a regular expression pattern
|
|
35
|
+
*/
|
|
36
|
+
declare const regexExtractTool: ai.Tool<RegexExtractInput, RegexExtractResult>;
|
|
37
|
+
|
|
38
|
+
export { type RegexExtractResult, regexExtractTool as default, regexExtractTool };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { tool, jsonSchema } from 'ai';
|
|
2
|
+
|
|
3
|
+
// src/index.ts
|
|
4
|
+
var regexExtractTool = tool({
|
|
5
|
+
description: "Extract all regex matches from text. Supports regex flags (g, i, m, s, u, y) and optional capture group extraction. Returns all matches with their positions and capture groups if requested. Useful for parsing structured text, extracting patterns, or validating formats.",
|
|
6
|
+
inputSchema: jsonSchema({
|
|
7
|
+
type: "object",
|
|
8
|
+
properties: {
|
|
9
|
+
text: {
|
|
10
|
+
type: "string",
|
|
11
|
+
description: "The text to search for matches"
|
|
12
|
+
},
|
|
13
|
+
pattern: {
|
|
14
|
+
type: "string",
|
|
15
|
+
description: "Regular expression pattern (without delimiters)"
|
|
16
|
+
},
|
|
17
|
+
flags: {
|
|
18
|
+
type: "string",
|
|
19
|
+
description: 'Regular expression flags (g=global, i=case-insensitive, m=multiline, s=dotAll, u=unicode, y=sticky). The "g" flag is added automatically if not present.'
|
|
20
|
+
},
|
|
21
|
+
groups: {
|
|
22
|
+
type: "boolean",
|
|
23
|
+
description: "If true, return matches as objects with capture groups and positions. If false, return simple string array of full matches. (default: false)"
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
required: ["text", "pattern"],
|
|
27
|
+
additionalProperties: false
|
|
28
|
+
}),
|
|
29
|
+
execute: async ({ text, pattern, flags = "", groups = false }) => {
|
|
30
|
+
if (typeof text !== "string") {
|
|
31
|
+
throw new Error("Text must be a string");
|
|
32
|
+
}
|
|
33
|
+
if (typeof pattern !== "string") {
|
|
34
|
+
throw new Error("Pattern must be a string");
|
|
35
|
+
}
|
|
36
|
+
const finalFlags = flags.includes("g") ? flags : flags + "g";
|
|
37
|
+
try {
|
|
38
|
+
const regex = new RegExp(pattern, finalFlags);
|
|
39
|
+
const matchIterator = text.matchAll(regex);
|
|
40
|
+
const allMatches = Array.from(matchIterator);
|
|
41
|
+
if (groups) {
|
|
42
|
+
const matches = allMatches.map((match) => ({
|
|
43
|
+
match: match[0],
|
|
44
|
+
groups: match.groups || {},
|
|
45
|
+
index: match.index ?? 0
|
|
46
|
+
}));
|
|
47
|
+
return {
|
|
48
|
+
matches,
|
|
49
|
+
matchCount: matches.length,
|
|
50
|
+
hasMatches: matches.length > 0
|
|
51
|
+
};
|
|
52
|
+
} else {
|
|
53
|
+
const matches = allMatches.map((match) => match[0]);
|
|
54
|
+
return {
|
|
55
|
+
matches,
|
|
56
|
+
matchCount: matches.length,
|
|
57
|
+
hasMatches: matches.length > 0
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
} catch (error) {
|
|
61
|
+
throw new Error(
|
|
62
|
+
`Failed to extract regex matches: ${error instanceof Error ? error.message : String(error)}`
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
var index_default = regexExtractTool;
|
|
68
|
+
|
|
69
|
+
export { index_default as default, regexExtractTool };
|
package/package.json
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@tpmjs/official-regex-extract",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Extract all regex matches from text with optional capture group support",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"tpmjs",
|
|
8
|
+
"text",
|
|
9
|
+
"regex",
|
|
10
|
+
"extract",
|
|
11
|
+
"pattern",
|
|
12
|
+
"matching"
|
|
13
|
+
],
|
|
14
|
+
"exports": {
|
|
15
|
+
".": {
|
|
16
|
+
"types": "./dist/index.d.ts",
|
|
17
|
+
"default": "./dist/index.js"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"files": [
|
|
21
|
+
"dist"
|
|
22
|
+
],
|
|
23
|
+
"devDependencies": {
|
|
24
|
+
"tsup": "^8.3.5",
|
|
25
|
+
"typescript": "^5.9.3",
|
|
26
|
+
"@tpmjs/tsconfig": "0.0.0"
|
|
27
|
+
},
|
|
28
|
+
"publishConfig": {
|
|
29
|
+
"access": "public"
|
|
30
|
+
},
|
|
31
|
+
"repository": {
|
|
32
|
+
"type": "git",
|
|
33
|
+
"url": "https://github.com/anthropics/tpmjs.git",
|
|
34
|
+
"directory": "packages/tools/official/regex-extract"
|
|
35
|
+
},
|
|
36
|
+
"homepage": "https://tpmjs.com",
|
|
37
|
+
"license": "MIT",
|
|
38
|
+
"tpmjs": {
|
|
39
|
+
"category": "text",
|
|
40
|
+
"frameworks": [
|
|
41
|
+
"vercel-ai"
|
|
42
|
+
],
|
|
43
|
+
"tools": [
|
|
44
|
+
{
|
|
45
|
+
"name": "regexExtractTool",
|
|
46
|
+
"description": "Extract all regex matches from text with optional capture group support",
|
|
47
|
+
"parameters": [
|
|
48
|
+
{
|
|
49
|
+
"name": "text",
|
|
50
|
+
"type": "string",
|
|
51
|
+
"description": "The text to search",
|
|
52
|
+
"required": true
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"name": "pattern",
|
|
56
|
+
"type": "string",
|
|
57
|
+
"description": "Regular expression pattern",
|
|
58
|
+
"required": true
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"name": "flags",
|
|
62
|
+
"type": "string",
|
|
63
|
+
"description": "Regular expression flags (g, i, m, s, u, y)",
|
|
64
|
+
"required": false
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"name": "groups",
|
|
68
|
+
"type": "boolean",
|
|
69
|
+
"description": "Return capture groups as objects",
|
|
70
|
+
"required": false
|
|
71
|
+
}
|
|
72
|
+
],
|
|
73
|
+
"returns": {
|
|
74
|
+
"type": "RegexExtractResult",
|
|
75
|
+
"description": "Object with matches array, match count, and hasMatches flag"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
},
|
|
80
|
+
"dependencies": {
|
|
81
|
+
"ai": "6.0.0-beta.124"
|
|
82
|
+
},
|
|
83
|
+
"scripts": {
|
|
84
|
+
"build": "tsup",
|
|
85
|
+
"dev": "tsup --watch",
|
|
86
|
+
"type-check": "tsc --noEmit",
|
|
87
|
+
"clean": "rm -rf dist .turbo"
|
|
88
|
+
}
|
|
89
|
+
}
|