@peac/mappings-content-signals 0.12.4 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -41
- package/package.json +18 -5
package/README.md
CHANGED
|
@@ -1,82 +1,122 @@
|
|
|
1
1
|
# @peac/mappings-content-signals
|
|
2
2
|
|
|
3
|
-
Content use policy signal parsing for PEAC
|
|
3
|
+
Content use policy signal parsing for PEAC: maps robots.txt, tdmrep.json, and Content-Usage (AIPREF) headers to observable content signals with source precedence resolution.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
## Installation
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
| ------------- | ------------------------------------------------------------------------------------- | -------- |
|
|
11
|
-
| tdmrep.json | EU Directive 2019/790, Art. 4 | Highest |
|
|
12
|
-
| Content-Usage | AIPREF attach draft (draft-ietf-aipref-attach-04), vocab (draft-ietf-aipref-vocab-05) | 2 |
|
|
13
|
-
| robots.txt | RFC 9309 | Lowest |
|
|
7
|
+
```bash
|
|
8
|
+
pnpm add @peac/mappings-content-signals
|
|
9
|
+
```
|
|
14
10
|
|
|
15
|
-
|
|
11
|
+
## What It Does
|
|
16
12
|
|
|
17
|
-
|
|
13
|
+
`@peac/mappings-content-signals` parses content use policy signals from multiple sources and resolves them using a defined source precedence chain: `tdmrep.json` > `Content-Usage` header (AIPREF) > `robots.txt`. Signals record observations, never enforce; every signal has a three-state decision of `allow`, `deny`, or `unspecified`. All parsers receive pre-fetched content and perform no network I/O.
|
|
18
14
|
|
|
19
|
-
|
|
15
|
+
## How Do I Use It?
|
|
20
16
|
|
|
21
|
-
|
|
17
|
+
### Create a complete observation from all available sources
|
|
22
18
|
|
|
23
19
|
```typescript
|
|
24
20
|
import { createObservation } from '@peac/mappings-content-signals';
|
|
25
21
|
|
|
26
22
|
const observation = createObservation({
|
|
27
23
|
target_uri: 'https://example.com/article',
|
|
28
|
-
robots_txt:
|
|
29
|
-
tdmrep_json: tdmrepContent,
|
|
24
|
+
robots_txt: 'User-agent: GPTBot\nDisallow: /',
|
|
30
25
|
content_usage: 'train-ai=n, search=y',
|
|
26
|
+
tdmrep_json: JSON.stringify({ 'tdm-reservation': 0 }),
|
|
31
27
|
});
|
|
32
28
|
|
|
33
29
|
for (const signal of observation.signals) {
|
|
34
30
|
console.log(`${signal.purpose}: ${signal.decision} (from ${signal.source})`);
|
|
35
31
|
}
|
|
32
|
+
|
|
33
|
+
console.log(observation.sources_checked);
|
|
34
|
+
// ['tdmrep-json', 'content-usage-header', 'robots-txt']
|
|
36
35
|
```
|
|
37
36
|
|
|
38
|
-
###
|
|
37
|
+
### Parse robots.txt for AI-relevant signals
|
|
39
38
|
|
|
40
|
-
|
|
39
|
+
```typescript
|
|
40
|
+
import { parseRobotsTxt } from '@peac/mappings-content-signals';
|
|
41
|
+
|
|
42
|
+
const entries = parseRobotsTxt('User-agent: GPTBot\nDisallow: /\n');
|
|
43
|
+
for (const entry of entries) {
|
|
44
|
+
console.log(entry.purpose, entry.decision);
|
|
45
|
+
// 'ai-training' 'deny'
|
|
46
|
+
// 'ai-inference' 'deny'
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Parse a Content-Usage header (AIPREF)
|
|
41
51
|
|
|
42
52
|
```typescript
|
|
43
53
|
import { parseContentUsage } from '@peac/mappings-content-signals';
|
|
44
54
|
|
|
45
|
-
const result = parseContentUsage('train-ai=n, search=y
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
55
|
+
const result = parseContentUsage('train-ai=n, search=y');
|
|
56
|
+
for (const entry of result.entries) {
|
|
57
|
+
console.log(entry.purpose, entry.decision);
|
|
58
|
+
// 'ai-training' 'deny'
|
|
59
|
+
// 'ai-search' 'allow'
|
|
60
|
+
}
|
|
61
|
+
// Unrecognized keys are preserved as extensions
|
|
62
|
+
console.log(result.extensions);
|
|
51
63
|
```
|
|
52
64
|
|
|
53
|
-
|
|
65
|
+
### Resolve signals from multiple sources using precedence
|
|
66
|
+
|
|
67
|
+
```typescript
|
|
68
|
+
import {
|
|
69
|
+
parseRobotsTxt,
|
|
70
|
+
parseContentUsage,
|
|
71
|
+
resolveSignals,
|
|
72
|
+
getDecisionForPurpose,
|
|
73
|
+
} from '@peac/mappings-content-signals';
|
|
74
|
+
|
|
75
|
+
const robotsEntries = parseRobotsTxt('User-agent: GPTBot\nDisallow: /');
|
|
76
|
+
const aiprefResult = parseContentUsage('train-ai=y');
|
|
54
77
|
|
|
55
|
-
|
|
78
|
+
const resolved = resolveSignals([...robotsEntries, ...aiprefResult.entries]);
|
|
56
79
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
| `train-genai` | `ai-generative` | Leaf | `y`=allow, `n`=deny |
|
|
62
|
-
| `search` | `ai-search` | Leaf | `y`=allow, `n`=deny |
|
|
80
|
+
// Content-Usage has higher precedence than robots.txt
|
|
81
|
+
const decision = getDecisionForPurpose(resolved, 'ai-training');
|
|
82
|
+
console.log(decision); // 'allow' (Content-Usage wins over robots.txt)
|
|
83
|
+
```
|
|
63
84
|
|
|
64
|
-
|
|
85
|
+
### Parse tdmrep.json for EU TDM reservation signals
|
|
65
86
|
|
|
66
|
-
|
|
87
|
+
```typescript
|
|
88
|
+
import { parseTdmrep } from '@peac/mappings-content-signals';
|
|
67
89
|
|
|
68
|
-
|
|
90
|
+
const entries = parseTdmrep(JSON.stringify({ 'tdm-reservation': 1 }));
|
|
91
|
+
for (const entry of entries) {
|
|
92
|
+
console.log(entry.purpose, entry.decision);
|
|
93
|
+
// 'tdm' 'deny'
|
|
94
|
+
// 'ai-training' 'deny'
|
|
95
|
+
}
|
|
96
|
+
```
|
|
69
97
|
|
|
70
|
-
|
|
98
|
+
## Integrates With
|
|
71
99
|
|
|
72
|
-
|
|
100
|
+
- `@peac/kernel` (Layer 0): Content signal types and purpose tokens
|
|
101
|
+
- `@peac/schema` (Layer 1): Signal schema validation
|
|
102
|
+
- `@peac/protocol` (Layer 3): Receipt issuance with content signal evidence
|
|
73
103
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
104
|
+
## For Agent Developers
|
|
105
|
+
|
|
106
|
+
If you are building an AI agent that needs to observe and record content use policies:
|
|
107
|
+
|
|
108
|
+
- Use `createObservation()` for a one-call workflow that parses all available sources and resolves precedence automatically
|
|
109
|
+
- Use individual parsers (`parseRobotsTxt`, `parseTdmrep`, `parseContentUsage`) when you need fine-grained control
|
|
110
|
+
- Use `resolveSignals()` to combine entries from multiple sources; the highest-priority source with a definitive signal wins
|
|
111
|
+
- Use `getDecisionForPurpose()` to query the resolved decision for a specific purpose such as `ai-training` or `ai-search`
|
|
112
|
+
- All functions accept pre-fetched content; the package performs no network I/O
|
|
79
113
|
|
|
80
114
|
## License
|
|
81
115
|
|
|
82
116
|
Apache-2.0
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
PEAC Protocol is an open source project stewarded by Originary and community contributors.
|
|
121
|
+
|
|
122
|
+
[Docs](https://www.peacprotocol.org) | [GitHub](https://github.com/peacprotocol/peac) | [Originary](https://www.originary.xyz)
|
package/package.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@peac/mappings-content-signals",
|
|
3
|
-
"version": "0.12.
|
|
3
|
+
"version": "0.12.6",
|
|
4
4
|
"description": "Content use policy signal parsing for PEAC (robots.txt, tdmrep.json, Content-Usage)",
|
|
5
5
|
"main": "dist/index.cjs",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
9
|
-
"url": "https://github.com/peacprotocol/peac.git",
|
|
9
|
+
"url": "git+https://github.com/peacprotocol/peac.git",
|
|
10
10
|
"directory": "packages/mappings/content-signals"
|
|
11
11
|
},
|
|
12
|
-
"author": "
|
|
12
|
+
"author": "PEAC Protocol Contributors",
|
|
13
13
|
"license": "Apache-2.0",
|
|
14
14
|
"bugs": {
|
|
15
15
|
"url": "https://github.com/peacprotocol/peac/issues"
|
|
@@ -23,8 +23,8 @@
|
|
|
23
23
|
"access": "public"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
|
-
"@peac/kernel": "0.12.
|
|
27
|
-
"@peac/schema": "0.12.
|
|
26
|
+
"@peac/kernel": "0.12.6",
|
|
27
|
+
"@peac/schema": "0.12.6"
|
|
28
28
|
},
|
|
29
29
|
"devDependencies": {
|
|
30
30
|
"@types/node": "^22.19.11",
|
|
@@ -41,6 +41,19 @@
|
|
|
41
41
|
},
|
|
42
42
|
"./package.json": "./package.json"
|
|
43
43
|
},
|
|
44
|
+
"keywords": [
|
|
45
|
+
"peac",
|
|
46
|
+
"peacprotocol",
|
|
47
|
+
"interaction-records",
|
|
48
|
+
"signed-records",
|
|
49
|
+
"receipts",
|
|
50
|
+
"originary",
|
|
51
|
+
"content-signals",
|
|
52
|
+
"aipref",
|
|
53
|
+
"robots-txt",
|
|
54
|
+
"tdmrep",
|
|
55
|
+
"ai-crawling"
|
|
56
|
+
],
|
|
44
57
|
"scripts": {
|
|
45
58
|
"prebuild": "rm -rf dist",
|
|
46
59
|
"build": "pnpm run build:js && pnpm run build:types",
|