@nuasite/cms-marker 0.0.79 → 0.0.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,24 @@ import { type HTMLElement as ParsedHTMLElement, parse } from 'node-html-parser'
2
2
  import { processSeoFromHtml } from './seo-processor'
3
3
  import { enhanceManifestWithSourceSnippets } from './source-finder'
4
4
  import { extractColorClasses } from './tailwind-colors'
5
- import type { ComponentInstance, ImageMetadata, ManifestEntry, PageSeoData, SeoOptions, SourceContext } from './types'
5
+ import type {
6
+ AriaAttributes,
7
+ ButtonAttributes,
8
+ ComponentInstance,
9
+ DataAttributes,
10
+ FormAttributes,
11
+ IframeAttributes,
12
+ ImageMetadata,
13
+ InputAttributes,
14
+ LinkAttributes,
15
+ ManifestEntry,
16
+ MediaAttributes,
17
+ PageSeoData,
18
+ SelectAttributes,
19
+ SeoOptions,
20
+ SourceContext,
21
+ TextareaAttributes,
22
+ } from './types'
6
23
  import { generateStableId } from './utils'
7
24
 
8
25
  /** Type for parsed HTML element nodes from node-html-parser */
@@ -719,6 +736,18 @@ export async function processHtml(
719
736
  const classAttr = node.getAttribute('class')
720
737
  const colorClasses = extractColorClasses(classAttr)
721
738
 
739
+ // Extract element-specific attributes for git diff tracking
740
+ const linkAttributes = extractLinkAttributes(node)
741
+ const buttonAttributes = extractButtonAttributes(node)
742
+ const inputAttributes = extractInputAttributes(node)
743
+ const formAttributes = extractFormAttributes(node)
744
+ const mediaAttributes = extractMediaAttributes(node)
745
+ const iframeAttributes = extractIframeAttributes(node)
746
+ const selectAttributes = extractSelectAttributes(node)
747
+ const textareaAttributes = extractTextareaAttributes(node)
748
+ const ariaAttributes = extractAriaAttributes(node)
749
+ const dataAttributes = extractDataAttributes(node)
750
+
722
751
  entries[id] = {
723
752
  id,
724
753
  tag,
@@ -742,6 +771,17 @@ export async function processHtml(
742
771
  imageMetadata: imageInfo?.metadata,
743
772
  // Color classes for buttons/styled elements
744
773
  colorClasses,
774
+ // Element-specific attributes for git diff tracking
775
+ linkAttributes,
776
+ buttonAttributes,
777
+ inputAttributes,
778
+ formAttributes,
779
+ mediaAttributes,
780
+ iframeAttributes,
781
+ selectAttributes,
782
+ textareaAttributes,
783
+ ariaAttributes,
784
+ dataAttributes,
745
785
  }
746
786
  })
747
787
  }
@@ -777,9 +817,9 @@ export async function processHtml(
777
817
  finalHtml = seoResult.html
778
818
 
779
819
  // If title was marked with CMS ID, add it to entries
780
- if (seoResult.titleCmsId && seo.title) {
781
- enhancedEntries[seoResult.titleCmsId] = {
782
- id: seoResult.titleCmsId,
820
+ if (seoResult.titleId && seo.title) {
821
+ enhancedEntries[seoResult.titleId] = {
822
+ id: seoResult.titleId,
783
823
  tag: 'title',
784
824
  text: seo.title.content,
785
825
  sourcePath: seo.title.sourcePath || sourcePath,
@@ -817,6 +857,425 @@ export function cleanText(text: string): string {
817
857
  return text.trim().replace(/\s+/g, ' ').toLowerCase()
818
858
  }
819
859
 
860
+ /**
861
+ * Extract link attributes from an anchor element for git diff tracking.
862
+ * Returns undefined if the element is not an anchor or has no href.
863
+ */
864
+ function extractLinkAttributes(node: HTMLNode): LinkAttributes | undefined {
865
+ const tag = node.tagName?.toLowerCase?.()
866
+ if (tag !== 'a') return undefined
867
+
868
+ const href = node.getAttribute('href')
869
+ if (!href) return undefined
870
+
871
+ const download = node.getAttribute('download')
872
+
873
+ return {
874
+ href,
875
+ target: node.getAttribute('target') || undefined,
876
+ rel: node.getAttribute('rel') || undefined,
877
+ title: node.getAttribute('title') || undefined,
878
+ download: download !== null ? (download || true) : undefined,
879
+ }
880
+ }
881
+
882
+ /**
883
+ * Extract button attributes for git diff tracking.
884
+ * Returns undefined if the element is not a button.
885
+ */
886
+ function extractButtonAttributes(node: HTMLNode): ButtonAttributes | undefined {
887
+ const tag = node.tagName?.toLowerCase?.()
888
+ if (tag !== 'button') return undefined
889
+
890
+ const result: ButtonAttributes = {}
891
+ let hasValues = false
892
+
893
+ const type = node.getAttribute('type')
894
+ if (type) { result.type = type; hasValues = true }
895
+
896
+ if (node.hasAttribute('disabled')) { result.disabled = true; hasValues = true }
897
+
898
+ const form = node.getAttribute('form')
899
+ if (form) { result.form = form; hasValues = true }
900
+
901
+ const formAction = node.getAttribute('formaction')
902
+ if (formAction) { result.formAction = formAction; hasValues = true }
903
+
904
+ const formMethod = node.getAttribute('formmethod')
905
+ if (formMethod) { result.formMethod = formMethod; hasValues = true }
906
+
907
+ return hasValues ? result : undefined
908
+ }
909
+
910
+ /**
911
+ * Extract input attributes for git diff tracking.
912
+ * Returns undefined if the element is not an input.
913
+ */
914
+ function extractInputAttributes(node: HTMLNode): InputAttributes | undefined {
915
+ const tag = node.tagName?.toLowerCase?.()
916
+ if (tag !== 'input') return undefined
917
+
918
+ const result: InputAttributes = {}
919
+ let hasValues = false
920
+
921
+ const type = node.getAttribute('type')
922
+ if (type) { result.type = type; hasValues = true }
923
+
924
+ const name = node.getAttribute('name')
925
+ if (name) { result.name = name; hasValues = true }
926
+
927
+ const placeholder = node.getAttribute('placeholder')
928
+ if (placeholder) { result.placeholder = placeholder; hasValues = true }
929
+
930
+ if (node.hasAttribute('required')) { result.required = true; hasValues = true }
931
+
932
+ const pattern = node.getAttribute('pattern')
933
+ if (pattern) { result.pattern = pattern; hasValues = true }
934
+
935
+ const inputMode = node.getAttribute('inputmode')
936
+ if (inputMode) { result.inputMode = inputMode; hasValues = true }
937
+
938
+ const autoComplete = node.getAttribute('autocomplete')
939
+ if (autoComplete) { result.autoComplete = autoComplete; hasValues = true }
940
+
941
+ if (node.hasAttribute('disabled')) { result.disabled = true; hasValues = true }
942
+
943
+ if (node.hasAttribute('readonly')) { result.readOnly = true; hasValues = true }
944
+
945
+ const min = node.getAttribute('min')
946
+ if (min) { result.min = min; hasValues = true }
947
+
948
+ const max = node.getAttribute('max')
949
+ if (max) { result.max = max; hasValues = true }
950
+
951
+ const step = node.getAttribute('step')
952
+ if (step) { result.step = step; hasValues = true }
953
+
954
+ const minLength = node.getAttribute('minlength')
955
+ if (minLength) { result.minLength = parseInt(minLength, 10); hasValues = true }
956
+
957
+ const maxLength = node.getAttribute('maxlength')
958
+ if (maxLength) { result.maxLength = parseInt(maxLength, 10); hasValues = true }
959
+
960
+ return hasValues ? result : undefined
961
+ }
962
+
963
+ /**
964
+ * Extract form attributes for git diff tracking.
965
+ * Returns undefined if the element is not a form.
966
+ */
967
+ function extractFormAttributes(node: HTMLNode): FormAttributes | undefined {
968
+ const tag = node.tagName?.toLowerCase?.()
969
+ if (tag !== 'form') return undefined
970
+
971
+ const result: FormAttributes = {}
972
+ let hasValues = false
973
+
974
+ const action = node.getAttribute('action')
975
+ if (action) { result.action = action; hasValues = true }
976
+
977
+ const method = node.getAttribute('method')
978
+ if (method) { result.method = method; hasValues = true }
979
+
980
+ const encType = node.getAttribute('enctype')
981
+ if (encType) { result.encType = encType; hasValues = true }
982
+
983
+ if (node.hasAttribute('novalidate')) { result.noValidate = true; hasValues = true }
984
+
985
+ const target = node.getAttribute('target')
986
+ if (target) { result.target = target; hasValues = true }
987
+
988
+ const name = node.getAttribute('name')
989
+ if (name) { result.name = name; hasValues = true }
990
+
991
+ return hasValues ? result : undefined
992
+ }
993
+
994
+ /**
995
+ * Extract media attributes for video/audio elements for git diff tracking.
996
+ * Returns undefined if the element is not a video or audio.
997
+ */
998
+ function extractMediaAttributes(node: HTMLNode): MediaAttributes | undefined {
999
+ const tag = node.tagName?.toLowerCase?.()
1000
+ if (tag !== 'video' && tag !== 'audio') return undefined
1001
+
1002
+ const result: MediaAttributes = {}
1003
+ let hasValues = false
1004
+
1005
+ const src = node.getAttribute('src')
1006
+ if (src) { result.src = src; hasValues = true }
1007
+
1008
+ const poster = node.getAttribute('poster')
1009
+ if (poster) { result.poster = poster; hasValues = true }
1010
+
1011
+ if (node.hasAttribute('controls')) { result.controls = true; hasValues = true }
1012
+
1013
+ if (node.hasAttribute('autoplay')) { result.autoplay = true; hasValues = true }
1014
+
1015
+ if (node.hasAttribute('muted')) { result.muted = true; hasValues = true }
1016
+
1017
+ if (node.hasAttribute('loop')) { result.loop = true; hasValues = true }
1018
+
1019
+ if (node.hasAttribute('playsinline')) { result.playsInline = true; hasValues = true }
1020
+
1021
+ const preload = node.getAttribute('preload')
1022
+ if (preload) { result.preload = preload; hasValues = true }
1023
+
1024
+ return hasValues ? result : undefined
1025
+ }
1026
+
1027
+ /**
1028
+ * Extract iframe attributes for git diff tracking.
1029
+ * Returns undefined if the element is not an iframe.
1030
+ */
1031
+ function extractIframeAttributes(node: HTMLNode): IframeAttributes | undefined {
1032
+ const tag = node.tagName?.toLowerCase?.()
1033
+ if (tag !== 'iframe') return undefined
1034
+
1035
+ const result: IframeAttributes = {}
1036
+ let hasValues = false
1037
+
1038
+ const src = node.getAttribute('src')
1039
+ if (src) { result.src = src; hasValues = true }
1040
+
1041
+ const title = node.getAttribute('title')
1042
+ if (title) { result.title = title; hasValues = true }
1043
+
1044
+ const allow = node.getAttribute('allow')
1045
+ if (allow) { result.allow = allow; hasValues = true }
1046
+
1047
+ const sandbox = node.getAttribute('sandbox')
1048
+ if (sandbox !== null) { result.sandbox = sandbox || ''; hasValues = true }
1049
+
1050
+ const loading = node.getAttribute('loading')
1051
+ if (loading) { result.loading = loading; hasValues = true }
1052
+
1053
+ const width = node.getAttribute('width')
1054
+ if (width) { result.width = width; hasValues = true }
1055
+
1056
+ const height = node.getAttribute('height')
1057
+ if (height) { result.height = height; hasValues = true }
1058
+
1059
+ const name = node.getAttribute('name')
1060
+ if (name) { result.name = name; hasValues = true }
1061
+
1062
+ return hasValues ? result : undefined
1063
+ }
1064
+
1065
+ /**
1066
+ * Extract select attributes for git diff tracking.
1067
+ * Returns undefined if the element is not a select.
1068
+ */
1069
+ function extractSelectAttributes(node: HTMLNode): SelectAttributes | undefined {
1070
+ const tag = node.tagName?.toLowerCase?.()
1071
+ if (tag !== 'select') return undefined
1072
+
1073
+ const result: SelectAttributes = {}
1074
+ let hasValues = false
1075
+
1076
+ const name = node.getAttribute('name')
1077
+ if (name) { result.name = name; hasValues = true }
1078
+
1079
+ if (node.hasAttribute('multiple')) { result.multiple = true; hasValues = true }
1080
+
1081
+ if (node.hasAttribute('required')) { result.required = true; hasValues = true }
1082
+
1083
+ if (node.hasAttribute('disabled')) { result.disabled = true; hasValues = true }
1084
+
1085
+ const size = node.getAttribute('size')
1086
+ if (size) { result.size = parseInt(size, 10); hasValues = true }
1087
+
1088
+ return hasValues ? result : undefined
1089
+ }
1090
+
1091
+ /**
1092
+ * Extract textarea attributes for git diff tracking.
1093
+ * Returns undefined if the element is not a textarea.
1094
+ */
1095
+ function extractTextareaAttributes(node: HTMLNode): TextareaAttributes | undefined {
1096
+ const tag = node.tagName?.toLowerCase?.()
1097
+ if (tag !== 'textarea') return undefined
1098
+
1099
+ const result: TextareaAttributes = {}
1100
+ let hasValues = false
1101
+
1102
+ const name = node.getAttribute('name')
1103
+ if (name) { result.name = name; hasValues = true }
1104
+
1105
+ const placeholder = node.getAttribute('placeholder')
1106
+ if (placeholder) { result.placeholder = placeholder; hasValues = true }
1107
+
1108
+ if (node.hasAttribute('required')) { result.required = true; hasValues = true }
1109
+
1110
+ if (node.hasAttribute('disabled')) { result.disabled = true; hasValues = true }
1111
+
1112
+ if (node.hasAttribute('readonly')) { result.readOnly = true; hasValues = true }
1113
+
1114
+ const rows = node.getAttribute('rows')
1115
+ if (rows) { result.rows = parseInt(rows, 10); hasValues = true }
1116
+
1117
+ const cols = node.getAttribute('cols')
1118
+ if (cols) { result.cols = parseInt(cols, 10); hasValues = true }
1119
+
1120
+ const minLength = node.getAttribute('minlength')
1121
+ if (minLength) { result.minLength = parseInt(minLength, 10); hasValues = true }
1122
+
1123
+ const maxLength = node.getAttribute('maxlength')
1124
+ if (maxLength) { result.maxLength = parseInt(maxLength, 10); hasValues = true }
1125
+
1126
+ const wrap = node.getAttribute('wrap')
1127
+ if (wrap) { result.wrap = wrap; hasValues = true }
1128
+
1129
+ return hasValues ? result : undefined
1130
+ }
1131
+
1132
+ /**
1133
+ * Extract ARIA accessibility attributes for git diff tracking.
1134
+ * Returns undefined if no ARIA attributes are present.
1135
+ */
1136
+ function extractAriaAttributes(node: HTMLNode): AriaAttributes | undefined {
1137
+ const result: AriaAttributes = {}
1138
+ let hasValues = false
1139
+
1140
+ const role = node.getAttribute('role')
1141
+ if (role) { result.role = role; hasValues = true }
1142
+
1143
+ const ariaLabel = node.getAttribute('aria-label')
1144
+ if (ariaLabel) { result.ariaLabel = ariaLabel; hasValues = true }
1145
+
1146
+ const ariaLabelledBy = node.getAttribute('aria-labelledby')
1147
+ if (ariaLabelledBy) { result.ariaLabelledBy = ariaLabelledBy; hasValues = true }
1148
+
1149
+ const ariaDescribedBy = node.getAttribute('aria-describedby')
1150
+ if (ariaDescribedBy) { result.ariaDescribedBy = ariaDescribedBy; hasValues = true }
1151
+
1152
+ const ariaHidden = node.getAttribute('aria-hidden')
1153
+ if (ariaHidden) { result.ariaHidden = ariaHidden === 'true'; hasValues = true }
1154
+
1155
+ const ariaExpanded = node.getAttribute('aria-expanded')
1156
+ if (ariaExpanded) { result.ariaExpanded = ariaExpanded === 'true'; hasValues = true }
1157
+
1158
+ const ariaPressed = node.getAttribute('aria-pressed')
1159
+ if (ariaPressed) {
1160
+ result.ariaPressed = ariaPressed === 'mixed' ? 'mixed' : ariaPressed === 'true'
1161
+ hasValues = true
1162
+ }
1163
+
1164
+ const ariaSelected = node.getAttribute('aria-selected')
1165
+ if (ariaSelected) { result.ariaSelected = ariaSelected === 'true'; hasValues = true }
1166
+
1167
+ const ariaDisabled = node.getAttribute('aria-disabled')
1168
+ if (ariaDisabled) { result.ariaDisabled = ariaDisabled === 'true'; hasValues = true }
1169
+
1170
+ const ariaRequired = node.getAttribute('aria-required')
1171
+ if (ariaRequired) { result.ariaRequired = ariaRequired === 'true'; hasValues = true }
1172
+
1173
+ const ariaInvalid = node.getAttribute('aria-invalid')
1174
+ if (ariaInvalid) {
1175
+ if (ariaInvalid === 'grammar' || ariaInvalid === 'spelling') {
1176
+ result.ariaInvalid = ariaInvalid
1177
+ } else {
1178
+ result.ariaInvalid = ariaInvalid === 'true'
1179
+ }
1180
+ hasValues = true
1181
+ }
1182
+
1183
+ const ariaLive = node.getAttribute('aria-live')
1184
+ if (ariaLive && (ariaLive === 'polite' || ariaLive === 'assertive' || ariaLive === 'off')) {
1185
+ result.ariaLive = ariaLive
1186
+ hasValues = true
1187
+ }
1188
+
1189
+ const ariaAtomic = node.getAttribute('aria-atomic')
1190
+ if (ariaAtomic) { result.ariaAtomic = ariaAtomic === 'true'; hasValues = true }
1191
+
1192
+ const ariaBusy = node.getAttribute('aria-busy')
1193
+ if (ariaBusy) { result.ariaBusy = ariaBusy === 'true'; hasValues = true }
1194
+
1195
+ const ariaCurrent = node.getAttribute('aria-current')
1196
+ if (ariaCurrent) { result.ariaCurrent = ariaCurrent; hasValues = true }
1197
+
1198
+ const ariaControls = node.getAttribute('aria-controls')
1199
+ if (ariaControls) { result.ariaControls = ariaControls; hasValues = true }
1200
+
1201
+ const ariaOwns = node.getAttribute('aria-owns')
1202
+ if (ariaOwns) { result.ariaOwns = ariaOwns; hasValues = true }
1203
+
1204
+ const ariaHasPopup = node.getAttribute('aria-haspopup')
1205
+ if (ariaHasPopup) {
1206
+ if (ariaHasPopup === 'menu' || ariaHasPopup === 'listbox' || ariaHasPopup === 'tree' || ariaHasPopup === 'grid' || ariaHasPopup === 'dialog') {
1207
+ result.ariaHasPopup = ariaHasPopup
1208
+ } else {
1209
+ result.ariaHasPopup = ariaHasPopup === 'true'
1210
+ }
1211
+ hasValues = true
1212
+ }
1213
+
1214
+ return hasValues ? result : undefined
1215
+ }
1216
+
1217
+ /**
1218
+ * High-value data-* attribute prefixes to track.
1219
+ * These are commonly used for analytics, tracking, feature flags, and business logic.
1220
+ */
1221
+ const DATA_ATTR_PREFIXES = [
1222
+ 'data-analytics',
1223
+ 'data-track',
1224
+ 'data-feature',
1225
+ 'data-variant',
1226
+ 'data-component',
1227
+ 'data-action',
1228
+ 'data-test',
1229
+ 'data-testid',
1230
+ 'data-cy',
1231
+ 'data-id',
1232
+ 'data-type',
1233
+ 'data-state',
1234
+ 'data-value',
1235
+ 'data-index',
1236
+ 'data-target',
1237
+ 'data-toggle',
1238
+ 'data-dismiss',
1239
+ 'data-slide',
1240
+ 'data-modal',
1241
+ 'data-tooltip',
1242
+ 'data-placement',
1243
+ ]
1244
+
1245
+ /**
1246
+ * Extract custom data-* attributes for git diff tracking.
1247
+ * Only captures high-value attributes based on common prefixes.
1248
+ * Returns undefined if no relevant data attributes are present.
1249
+ */
1250
+ function extractDataAttributes(node: HTMLNode): DataAttributes | undefined {
1251
+ const result: DataAttributes = {}
1252
+ let hasValues = false
1253
+
1254
+ // Get all attributes from the node
1255
+ const attrs = node.attributes || {}
1256
+ for (const [attrName, attrValue] of Object.entries(attrs)) {
1257
+ // Skip non-data attributes
1258
+ if (!attrName.startsWith('data-')) continue
1259
+
1260
+ // Skip internal Astro attributes
1261
+ if (attrName.startsWith('data-astro-')) continue
1262
+
1263
+ // Skip our own CMS attributes
1264
+ if (attrName.startsWith('data-cms-')) continue
1265
+
1266
+ // Check if it matches any high-value prefix
1267
+ const isHighValue = DATA_ATTR_PREFIXES.some(prefix => attrName.startsWith(prefix))
1268
+ if (isHighValue) {
1269
+ // Store without 'data-' prefix for cleaner output
1270
+ const key = attrName.substring(5)
1271
+ result[key] = attrValue
1272
+ hasValues = true
1273
+ }
1274
+ }
1275
+
1276
+ return hasValues ? result : undefined
1277
+ }
1278
+
820
1279
  /**
821
1280
  * Extract source context for an element to enable resilient matching.
822
1281
  * This captures information about the element's position in the DOM
package/src/index.ts CHANGED
@@ -128,8 +128,10 @@ export type { CollectionInfo, MarkdownContent, SourceLocation, VariableReference
128
128
  // Re-export types for consumers
129
129
  export { findCollectionSource, parseMarkdownContent } from './source-finder'
130
130
  export type {
131
+ AriaAttributes,
131
132
  AvailableColors,
132
133
  AvailableTextStyles,
134
+ ButtonAttributes,
133
135
  CanonicalUrl,
134
136
  CmsManifest,
135
137
  CmsMarkerOptions,
@@ -140,16 +142,23 @@ export type {
140
142
  ComponentInstance,
141
143
  ComponentProp,
142
144
  ContentConstraints,
145
+ DataAttributes,
143
146
  FieldDefinition,
144
147
  FieldType,
148
+ FormAttributes,
145
149
  GradientClasses,
150
+ IframeAttributes,
146
151
  ImageMetadata,
152
+ InputAttributes,
147
153
  JsonLdEntry,
154
+ LinkAttributes,
148
155
  ManifestEntry,
149
156
  ManifestMetadata,
157
+ MediaAttributes,
150
158
  OpacityClasses,
151
159
  OpenGraphData,
152
160
  PageSeoData,
161
+ SelectAttributes,
153
162
  SeoKeywords,
154
163
  SeoMetaTag,
155
164
  SeoOptions,
@@ -157,6 +166,7 @@ export type {
157
166
  SeoTitle,
158
167
  SourceContext,
159
168
  TailwindColor,
169
+ TextareaAttributes,
160
170
  TextStyleValue,
161
171
  TwitterCardData,
162
172
  } from './types'