@tricoteuses/tisseuse 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,17 +8,17 @@ export { compoundReferencesSeparators, divisionTypes, europeanLawNatures, french
8
8
  export { citation, citationLigne, citationSimple, } from './text_parsers/citations.js';
9
9
  export { date, duDate } from './text_parsers/dates.js';
10
10
  export { definitionDivision, designationDivision, division, divisions, natureDivisionSingulier, numeroDivision, } from './text_parsers/divisions.js';
11
+ export type { FragmentPosition, FragmentReverseTransformation, } from './text_parsers/fragments.js';
11
12
  export { addChildLeftToLastChild, createEnumerationOrBoundedInterval, createParentChildTreeFromReferences, iterAtomicFirstParentReferences, iterAtomicReferences, iterIncludedReferences, } from './text_parsers/helpers.js';
12
- export { getReferences, iterCitationReferences, iterReferences, } from './text_parsers/index.js';
13
+ export { getParsedReferences, getParsedReferencesWithOriginalTransformations, parseCitationReferences, parseReferences, parseReferencesWithOriginalTransformations, } from './text_parsers/index.js';
13
14
  export { adjectifNumeralOrdinalCourt, adverbeMultiplicatifLatin, nombreAsTextAstNumber, nombreCardinal, nombreRomainCardinal, nombreRomainOrdinal, nombreRomainOu0iAsTextAstNumber, } from './text_parsers/numbers.js';
14
15
  export { alternatives, chain, convert, optional, parseText, regExp, repeat, TextParserContext, variable, wordsTree, } from './text_parsers/parsers.js';
15
16
  export { auPortion, auxPortions, numeroPortion, portions, unePortion, } from './text_parsers/portions.js';
16
- export type { TextPosition } from './text_parsers/positions.js';
17
17
  export { ditPluriel, ditSingulier, introPluriel, introSingulier, liaisonPluriel, liaisonSingulier, } from './text_parsers/prepositions.js';
18
18
  export { reference, uniteBasePreciseeSingulier, uniteBaseSingulier, } from './text_parsers/references.js';
19
19
  export { adverbeRelatif, espaceAdverbeRelatif, relatifPlurielPrepose, relatifSingulierPrepose, } from './text_parsers/relative_locations.js';
20
20
  export { convertHtmlElementsToText, decodeNamedHtmlEntities, decodeNumericHtmlEntities, replacePattern, replacePatterns, simplifyHtml, simplifyText, simplifyUnicodeCharacters, } from './text_parsers/simplifiers.js';
21
21
  export { definitionTexteFrancais, identificationTexteEuropeen, natureTexteFrancais, numeroEtOuDateTexteFrancais, numeroTexteEuropeen, numeroTexteFrancais, optionalEspaceDuTerritoire, texte, texteEuropeen, texteFrancais, texteInternational, } from './text_parsers/texts.js';
22
- export { chainTransformers, iterOriginalMergedPositionsFromTransformed, originalMergedPositionsFromTransformed, originalSplitPositionsFromTransformed, type FragmentReverseTransformation, type SourceMapSegment, type Transformation, type TransformationLeaf, type TransformationNode, type Transformer, type TransformerLeaf, type TransformerNode, } from './text_parsers/transformers.js';
22
+ export { chainTransformers, iterOriginalMergedPositionsFromTransformed, originalMergedPositionsFromTransformed, originalSplitPositionsFromTransformed, reverseTransformedInnerFragment, reverseTransformedReplacement, type SourceMapSegment, type Transformation, type TransformationLeaf, type TransformationNode, type Transformer, type TransformerLeaf, type TransformerNode, } from './text_parsers/transformers.js';
23
23
  export { espace, lettreAsciiMinuscule, nonLettre, numero, virguleOuEspace, } from './text_parsers/typography.js';
24
24
  export { cleanTexteTitle } from './textes.js';
@@ -1,9 +1,11 @@
1
1
  import { TextParserContext } from '../text_parsers/parsers.js';
2
2
  import { DefinitionOrLink } from '../text_parsers/text_links.js';
3
- export declare function iterTextLinks(context: TextParserContext, { date, defaultTextId, logIgnoredReferencesTypes, logPartialReferences, logReferences, }: {
3
+ import { Transformation } from '../text_parsers/transformers.js';
4
+ export declare function iterTextLinks(context: TextParserContext, { date, defaultTextId, logIgnoredReferencesTypes, logPartialReferences, logReferences, transformation, }: {
4
5
  date: string;
5
6
  defaultTextId?: string;
6
7
  logIgnoredReferencesTypes?: boolean;
7
8
  logPartialReferences?: boolean;
8
9
  logReferences?: boolean;
10
+ transformation?: Transformation;
9
11
  }): AsyncGenerator<DefinitionOrLink, void>;
@@ -1,4 +1,4 @@
1
- import { TextPosition } from './positions.js';
1
+ import { FragmentPosition, FragmentReverseTransformation } from './fragments.js';
2
2
  export type CompoundReferencesSeparator = (typeof compoundReferencesSeparators)[number];
3
3
  export type DivisionType = (typeof divisionTypes)[number];
4
4
  export type EuropeanLawNature = (typeof europeanLawNatures)[number];
@@ -28,6 +28,7 @@ export type TextAstBoundedInterval = {
28
28
  } & TextAstPosition;
29
29
  export type TextAstCitation = {
30
30
  content: TextAstPosition[];
31
+ references?: TextAstReference[];
31
32
  type: "citation";
32
33
  } & TextAstPosition;
33
34
  export type TextAstCompoundReference = TextAstBoundedInterval | TextAstCountedInterval | TextAstEnumeration | TextAstExclusion;
@@ -89,7 +90,8 @@ export type TextAstPortion = {
89
90
  type: PortionType;
90
91
  } & TextAstLocalization & TextAstPosition;
91
92
  export interface TextAstPosition {
92
- position: TextPosition;
93
+ originalTransformation?: FragmentReverseTransformation;
94
+ position: FragmentPosition;
93
95
  }
94
96
  export type TextAstReference = TextAstAtomicReference | TextAstCompoundReference | TextAstParentChild | TextAstReferenceAndAction;
95
97
  export type TextAstReferenceAndAction = {
@@ -0,0 +1,11 @@
1
+ export interface FragmentPosition {
2
+ start: number;
3
+ stop: number;
4
+ }
5
+ export interface FragmentReverseTransformation {
6
+ innerPrefix?: string;
7
+ innerSuffix?: string;
8
+ outerPrefix?: string;
9
+ outerSuffix?: string;
10
+ position: FragmentPosition;
11
+ }
@@ -1,11 +1,13 @@
1
1
  import { CompoundReferencesSeparator, TextAstAtomicReference, TextAstParentChild, TextAstReference } from './ast.js';
2
- import { TextPosition } from './positions.js';
2
+ import { FragmentPosition } from './fragments.js';
3
3
  export declare const addChildLeftToLastChild: (reference: TextAstReference, child: TextAstReference) => TextAstReference;
4
- export declare const createEnumerationOrBoundedInterval: (reference: TextAstReference, remaining: Array<[CompoundReferencesSeparator, TextAstReference]>, position: TextPosition) => TextAstReference;
5
- export declare const createEnumerationOrBoundedInterval1: (reference: TextAstReference, remaining: Array<[CompoundReferencesSeparator, TextAstReference]>, position: TextPosition, remainingIndex: number) => TextAstReference;
6
- export declare const createParentChildTreeFromReferences: (child: TextAstReference, ancestors: TextAstAtomicReference[], position: TextPosition) => TextAstReference;
4
+ export declare const createEnumerationOrBoundedInterval: (reference: TextAstReference, remaining: Array<[CompoundReferencesSeparator, TextAstReference]>, position: FragmentPosition) => TextAstReference;
5
+ export declare const createEnumerationOrBoundedInterval1: (reference: TextAstReference, remaining: Array<[CompoundReferencesSeparator, TextAstReference]>, position: FragmentPosition, remainingIndex: number) => TextAstReference;
6
+ export declare const createParentChildTreeFromReferences: (child: TextAstReference, ancestors: TextAstAtomicReference[], position: FragmentPosition) => TextAstReference;
7
7
  export declare const getHighestAtomicType: (type1: TextAstAtomicReference["type"], type2: TextAstAtomicReference["type"]) => TextAstAtomicReference["type"];
8
8
  export declare const getReferenceHighestAtomicType: (reference: TextAstReference) => TextAstAtomicReference["type"];
9
9
  export declare function iterAtomicFirstParentReferences<T extends TextAstAtomicReference | TextAstParentChild>(reference: TextAstReference): Generator<T, void>;
10
10
  export declare function iterAtomicReferences(reference: TextAstReference): Generator<TextAstAtomicReference, void>;
11
- export declare function iterIncludedReferences(reference: TextAstReference): Generator<TextAstReference, void>;
11
+ export declare function iterIncludedReferences(reference: TextAstReference, { citations }?: {
12
+ citations?: boolean;
13
+ }): Generator<TextAstReference, void>;
@@ -1,5 +1,8 @@
1
1
  import { TextAstCitation, TextAstReference } from './ast.js';
2
2
  import { TextParserContext } from './parsers.js';
3
- export declare function iterCitationReferences(context: TextParserContext, citation: TextAstCitation): Generator<TextAstReference, void>;
4
- export declare function iterReferences(context: TextParserContext): Generator<TextAstReference, void>;
5
- export declare const getReferences: (context: TextParserContext) => TextAstReference[];
3
+ import { Transformation } from './transformers.js';
4
+ export declare function parseCitationReferences(context: TextParserContext, citation: TextAstCitation): Generator<TextAstReference, void>;
5
+ export declare function parseReferences(context: TextParserContext): Generator<TextAstReference, void>;
6
+ export declare function parseReferencesWithOriginalTransformations(context: TextParserContext, transformation: Transformation): Generator<TextAstReference, void>;
7
+ export declare const getParsedReferences: (context: TextParserContext) => TextAstReference[];
8
+ export declare const getParsedReferencesWithOriginalTransformations: (context: TextParserContext, transformation: Transformation) => TextAstReference[];
@@ -1,5 +1,5 @@
1
1
  import { TextAst, TextAstArticle, TextAstText, TextAstTextInfos, TextInfosByWordsTree } from './ast.js';
2
- import { TextPosition } from './positions.js';
2
+ import { FragmentPosition } from './fragments.js';
3
3
  export type TextInfosConverter = (infos: TextAstTextInfos, context: TextParserContext) => TextAst | undefined;
4
4
  export type RegExpConverter = (match: RegExpExecArray, context: TextParserContext) => TextAst | undefined;
5
5
  export type TextAstConverter<T extends TextAst> = (ast: T, context: TextParserContext) => TextAst | undefined;
@@ -13,9 +13,9 @@ export declare class TextParserContext {
13
13
  usedInputs: TextTree | undefined;
14
14
  variables: Record<string, TextAst>;
15
15
  constructor(input: string, offset?: number);
16
- position(): TextPosition;
16
+ position(): FragmentPosition;
17
17
  remaining(): string;
18
- text(position?: TextPosition): string;
18
+ text(position?: FragmentPosition): string;
19
19
  textFromResults(results: TextAst[] | undefined): string;
20
20
  }
21
21
  type TextTree = string | null | Array<TextTree>;
@@ -1,12 +1,19 @@
1
1
  import { TextAstArticle, TextAstDivision, TextAstPosition, TextAstReference, TextAstText } from './ast.js';
2
- import { TextPosition } from './positions.js';
2
+ import { FragmentPosition, FragmentReverseTransformation } from './fragments.js';
3
3
  export type DefinitionOrLink = ArticleDefinition | ArticleLink | DivisionLink | TextLink;
4
4
  export interface ArticleDefinition {
5
5
  article: TextAstArticle;
6
+ /**
7
+ * Same value as article.originalTransformation, added for homogeneity
8
+ *
9
+ * Only defined when a transformation was used to convert input text
10
+ * simplified text.
11
+ */
12
+ originalTransformation?: FragmentReverseTransformation;
6
13
  /**
7
14
  * Same value as article.position, added for homogeneity
8
15
  */
9
- position: TextPosition;
16
+ position: FragmentPosition;
10
17
  reference: TextAstReference;
11
18
  textId: string;
12
19
  type: "article_definition";
@@ -14,21 +21,36 @@ export interface ArticleDefinition {
14
21
  export interface ArticleExternalLink {
15
22
  article: TextAstArticle;
16
23
  articleId?: string;
17
- position: TextPosition;
24
+ /**
25
+ * Only defined when a transformation was used to convert input text
26
+ * simplified text.
27
+ */
28
+ originalTransformation?: FragmentReverseTransformation;
29
+ position: FragmentPosition;
18
30
  reference: TextAstReference;
19
31
  type: "external_article";
20
32
  }
21
33
  export interface ArticleInternalLink {
22
34
  article: TextAstArticle;
23
35
  definition: ArticleDefinition;
24
- position: TextPosition;
36
+ /**
37
+ * Only defined when a transformation was used to convert input text
38
+ * simplified text.
39
+ */
40
+ originalTransformation?: FragmentReverseTransformation;
41
+ position: FragmentPosition;
25
42
  reference: TextAstReference;
26
43
  type: "internal_article";
27
44
  }
28
45
  export type ArticleLink = ArticleExternalLink | ArticleInternalLink;
29
46
  export interface DivisionExternalLink {
30
47
  division: TextAstDivision;
31
- position: TextPosition;
48
+ /**
49
+ * Only defined when a transformation was used to convert input text
50
+ * simplified text.
51
+ */
52
+ originalTransformation?: FragmentReverseTransformation;
53
+ position: FragmentPosition;
32
54
  reference: TextAstReference;
33
55
  sectionTaId?: string;
34
56
  type: "external_division";
@@ -42,7 +64,12 @@ export interface ExtractedLinkDb {
42
64
  target_id: string | null;
43
65
  }
44
66
  export interface TextExternalLink {
45
- position: TextPosition;
67
+ /**
68
+ * Only defined when a transformation was used to convert input text
69
+ * simplified text.
70
+ */
71
+ originalTransformation?: FragmentReverseTransformation;
72
+ position: FragmentPosition;
46
73
  reference: TextAstReference;
47
74
  text: TextAstText & TextAstPosition;
48
75
  type: "external_text";
@@ -1,11 +1,4 @@
1
- import { TextPosition } from './positions.js';
2
- export interface FragmentReverseTransformation {
3
- innerPrefix?: string;
4
- innerSuffix?: string;
5
- outerPrefix?: string;
6
- outerSuffix?: string;
7
- position: TextPosition;
8
- }
1
+ import { FragmentPosition, FragmentReverseTransformation } from './fragments.js';
9
2
  export interface SourceMapSegment {
10
3
  inputIndex: number;
11
4
  inputLength: number;
@@ -39,15 +32,37 @@ export type TransformerNode = (text: string) => TransformationNode;
39
32
  export declare const tagRegExp: RegExp;
40
33
  export declare function chainTransformers(title: string, transformers: Array<Transformer>): TransformerNode;
41
34
  /**
42
- * Caution: This iterator fails when successive original positions overlap.
35
+ * Generator that converts transformed (e.g. simplified) positions to original
36
+ * (e.g. HTML) positions
37
+ *
38
+ * When a position is included in fragments of HTML elements, the elements are
39
+ * either split (for spans) or the position is enlarged to include the whole
40
+ * elements (for blocks).
41
+ *
42
+ * Use this generator to insert HTML elements (links, spans, etc).
43
43
  */
44
- export declare function iterOriginalMergedPositionsFromTransformed(transformation: Transformation): Generator<FragmentReverseTransformation, void, TextPosition | undefined>;
44
+ export declare function iterOriginalMergedPositionsFromTransformed(transformation: Transformation): Generator<FragmentReverseTransformation, void, FragmentPosition | undefined>;
45
45
  /**
46
- * Caution: This iterator fails when successive original positions overlap.
46
+ * Converts an array of transformed (e.g. simplified) positions to an array
47
+ * of original (e.g. HTML) positions
48
+ *
49
+ * When a position is included in fragments of HTML elements, the elements are
50
+ * either split (for spans) or the position is enlarged to include the whole
51
+ * elements (for blocks).
52
+ *
53
+ * Use this function to insert HTML elements (links, spans, etc).
47
54
  */
48
- export declare function originalMergedPositionsFromTransformed(transformation: Transformation, transformedPositions: TextPosition[]): FragmentReverseTransformation[];
55
+ export declare function originalMergedPositionsFromTransformed(transformation: Transformation, transformedPositions: FragmentPosition[]): FragmentReverseTransformation[];
49
56
  /**
50
- * Note: The original positions are split when they overlap.
51
- * So, there may be more original positions than transformed positions.
57
+ * Converts an array of transformed (e.g. simplified) positions to an array
58
+ * of original (e.g. HTML) positions
59
+ *
60
+ * Each position is split to ensure that it doesn't contain any HTML element.
61
+ *
62
+ * The positions must be sorted in ascending order.
63
+ *
64
+ * Use this function for diffs.
52
65
  */
53
- export declare function originalSplitPositionsFromTransformed(transformation: Transformation, positions: TextPosition[]): TextPosition[];
66
+ export declare function originalSplitPositionsFromTransformed(transformation: Transformation, positions: FragmentPosition[]): FragmentPosition[];
67
+ export declare const reverseTransformedInnerFragment: <StringOrUndefined extends string | undefined>(originalText: string, originalTransformation: FragmentReverseTransformation | undefined, offset?: number) => StringOrUndefined;
68
+ export declare const reverseTransformedReplacement: (originalTransformation: FragmentReverseTransformation, replacement: string) => string;
@@ -1 +0,0 @@
1
- export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@tricoteuses/tisseuse",
3
3
  "description": "Find links in/to French legislative documents",
4
- "version": "0.1.8",
4
+ "version": "0.2.0",
5
5
  "keywords": [
6
6
  "Assemblée nationale",
7
7
  "France",
@@ -1,4 +0,0 @@
1
- export interface TextPosition {
2
- start: number;
3
- stop: number;
4
- }